001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers.tar; 020 021import java.io.ByteArrayOutputStream; 022import java.io.IOException; 023import java.io.InputStream; 024import java.io.UncheckedIOException; 025import java.math.BigInteger; 026import java.nio.ByteBuffer; 027import java.nio.charset.Charset; 028import java.nio.charset.StandardCharsets; 029import java.util.ArrayList; 030import java.util.Collections; 031import java.util.HashMap; 032import java.util.List; 033import java.util.Map; 034 035import org.apache.commons.compress.archivers.zip.ZipEncoding; 036import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; 037import org.apache.commons.compress.utils.CharsetNames; 038import org.apache.commons.compress.utils.IOUtils; 039 040/** 041 * This class provides static utility methods to work with byte streams. 042 * 043 * @Immutable 044 */ 045// CheckStyle:HideUtilityClassConstructorCheck OFF (bc) 046public class TarUtils { 047 048 private static final int BYTE_MASK = 255; 049 050 static final ZipEncoding DEFAULT_ENCODING = 051 ZipEncodingHelper.getZipEncoding(null); 052 053 /** 054 * Encapsulates the algorithms used up to Commons Compress 1.3 as 055 * ZipEncoding. 056 */ 057 static final ZipEncoding FALLBACK_ENCODING = new ZipEncoding() { 058 @Override 059 public boolean canEncode(final String name) { return true; } 060 061 @Override 062 public String decode(final byte[] buffer) { 063 final int length = buffer.length; 064 final StringBuilder result = new StringBuilder(length); 065 066 for (final byte b : buffer) { 067 if (b == 0) { // Trailing null 068 break; 069 } 070 result.append((char) (b & 0xFF)); // Allow for sign-extension 071 } 072 073 return result.toString(); 074 } 075 076 @Override 077 public ByteBuffer encode(final String name) { 078 final int length = name.length(); 079 final byte[] buf = new byte[length]; 080 081 // copy until end of input or output is reached. 082 for (int i = 0; i < length; ++i) { 083 buf[i] = (byte) name.charAt(i); 084 } 085 return ByteBuffer.wrap(buf); 086 } 087 }; 088 089 /** 090 * Compute the checksum of a tar entry header. 091 * 092 * @param buf The tar entry's header buffer. 093 * @return The computed checksum. 094 */ 095 public static long computeCheckSum(final byte[] buf) { 096 long sum = 0; 097 098 for (final byte element : buf) { 099 sum += BYTE_MASK & element; 100 } 101 102 return sum; 103 } 104 105 // Helper method to generate the exception message 106 private static String exceptionMessage(final byte[] buffer, final int offset, 107 final int length, final int current, final byte currentByte) { 108 // default charset is good enough for an exception message, 109 // 110 // the alternative was to modify parseOctal and 111 // parseOctalOrBinary to receive the ZipEncoding of the 112 // archive (deprecating the existing public methods, of 113 // course) and dealing with the fact that ZipEncoding#decode 114 // can throw an IOException which parseOctal* doesn't declare 115 String string = new String(buffer, offset, length, Charset.defaultCharset()); 116 117 string = string.replace("\0", "{NUL}"); // Replace NULs to allow string to be printed 118 return "Invalid byte " + currentByte + " at offset " + (current - offset) + " in '" + string + "' len=" + length; 119 } 120 121 private static void formatBigIntegerBinary(final long value, final byte[] buf, 122 final int offset, 123 final int length, 124 final boolean negative) { 125 final BigInteger val = BigInteger.valueOf(value); 126 final byte[] b = val.toByteArray(); 127 final int len = b.length; 128 if (len > length - 1) { 129 throw new IllegalArgumentException("Value " + value + 130 " is too large for " + length + " byte field."); 131 } 132 final int off = offset + length - len; 133 System.arraycopy(b, 0, buf, off, len); 134 final byte fill = (byte) (negative ? 0xff : 0); 135 for (int i = offset + 1; i < off; i++) { 136 buf[i] = fill; 137 } 138 } 139 140 /** 141 * Writes an octal value into a buffer. 142 * 143 * Uses {@link #formatUnsignedOctalString} to format 144 * the value as an octal string with leading zeros. 145 * The converted number is followed by NUL and then space. 146 * 147 * @param value The value to convert 148 * @param buf The destination buffer 149 * @param offset The starting offset into the buffer. 150 * @param length The size of the buffer. 151 * @return The updated value of offset, i.e. offset+length 152 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer 153 */ 154 public static int formatCheckSumOctalBytes(final long value, final byte[] buf, final int offset, final int length) { 155 156 int idx=length-2; // for NUL and space 157 formatUnsignedOctalString(value, buf, offset, idx); 158 159 buf[offset + idx++] = 0; // Trailing null 160 buf[offset + idx] = (byte) ' '; // Trailing space 161 162 return offset + length; 163 } 164 165 private static void formatLongBinary(final long value, final byte[] buf, 166 final int offset, final int length, 167 final boolean negative) { 168 final int bits = (length - 1) * 8; 169 final long max = 1L << bits; 170 long val = Math.abs(value); // Long.MIN_VALUE stays Long.MIN_VALUE 171 if (val < 0 || val >= max) { 172 throw new IllegalArgumentException("Value " + value + 173 " is too large for " + length + " byte field."); 174 } 175 if (negative) { 176 val ^= max - 1; 177 val++; 178 val |= 0xffL << bits; 179 } 180 for (int i = offset + length - 1; i >= offset; i--) { 181 buf[i] = (byte) val; 182 val >>= 8; 183 } 184 } 185 186 /** 187 * Write an octal long integer into a buffer. 188 * 189 * Uses {@link #formatUnsignedOctalString} to format 190 * the value as an octal string with leading zeros. 191 * The converted number is followed by a space. 192 * 193 * @param value The value to write as octal 194 * @param buf The destinationbuffer. 195 * @param offset The starting offset into the buffer. 196 * @param length The length of the buffer 197 * @return The updated offset 198 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer 199 */ 200 public static int formatLongOctalBytes(final long value, final byte[] buf, final int offset, final int length) { 201 202 final int idx=length-1; // For space 203 204 formatUnsignedOctalString(value, buf, offset, idx); 205 buf[offset + idx] = (byte) ' '; // Trailing space 206 207 return offset + length; 208 } 209 210 /** 211 * Write a long integer into a buffer as an octal string if this 212 * will fit, or as a binary number otherwise. 213 * 214 * Uses {@link #formatUnsignedOctalString} to format 215 * the value as an octal string with leading zeros. 216 * The converted number is followed by a space. 217 * 218 * @param value The value to write into the buffer. 219 * @param buf The destination buffer. 220 * @param offset The starting offset into the buffer. 221 * @param length The length of the buffer. 222 * @return The updated offset. 223 * @throws IllegalArgumentException if the value (and trailer) 224 * will not fit in the buffer. 225 * @since 1.4 226 */ 227 public static int formatLongOctalOrBinaryBytes( 228 final long value, final byte[] buf, final int offset, final int length) { 229 230 // Check whether we are dealing with UID/GID or SIZE field 231 final long maxAsOctalChar = length == TarConstants.UIDLEN ? TarConstants.MAXID : TarConstants.MAXSIZE; 232 233 final boolean negative = value < 0; 234 if (!negative && value <= maxAsOctalChar) { // OK to store as octal chars 235 return formatLongOctalBytes(value, buf, offset, length); 236 } 237 238 if (length < 9) { 239 formatLongBinary(value, buf, offset, length, negative); 240 } else { 241 formatBigIntegerBinary(value, buf, offset, length, negative); 242 } 243 244 buf[offset] = (byte) (negative ? 0xff : 0x80); 245 return offset + length; 246 } 247 248 /** 249 * Copy a name into a buffer. 250 * Copies characters from the name into the buffer 251 * starting at the specified offset. 252 * If the buffer is longer than the name, the buffer 253 * is filled with trailing NULs. 254 * If the name is longer than the buffer, 255 * the output is truncated. 256 * 257 * @param name The header name from which to copy the characters. 258 * @param buf The buffer where the name is to be stored. 259 * @param offset The starting offset into the buffer 260 * @param length The maximum number of header bytes to copy. 261 * @return The updated offset, i.e. offset + length 262 */ 263 public static int formatNameBytes(final String name, final byte[] buf, final int offset, final int length) { 264 try { 265 return formatNameBytes(name, buf, offset, length, DEFAULT_ENCODING); 266 } catch (final IOException ex) { // NOSONAR 267 try { 268 return formatNameBytes(name, buf, offset, length, 269 FALLBACK_ENCODING); 270 } catch (final IOException ex2) { 271 // impossible 272 throw new UncheckedIOException(ex2); //NOSONAR 273 } 274 } 275 } 276 277 /** 278 * Copy a name into a buffer. 279 * Copies characters from the name into the buffer 280 * starting at the specified offset. 281 * If the buffer is longer than the name, the buffer 282 * is filled with trailing NULs. 283 * If the name is longer than the buffer, 284 * the output is truncated. 285 * 286 * @param name The header name from which to copy the characters. 287 * @param buf The buffer where the name is to be stored. 288 * @param offset The starting offset into the buffer 289 * @param length The maximum number of header bytes to copy. 290 * @param encoding name of the encoding to use for file names 291 * @since 1.4 292 * @return The updated offset, i.e. offset + length 293 * @throws IOException on error 294 */ 295 public static int formatNameBytes(final String name, final byte[] buf, final int offset, 296 final int length, 297 final ZipEncoding encoding) 298 throws IOException { 299 int len = name.length(); 300 ByteBuffer b = encoding.encode(name); 301 while (b.limit() > length && len > 0) { 302 b = encoding.encode(name.substring(0, --len)); 303 } 304 final int limit = b.limit() - b.position(); 305 System.arraycopy(b.array(), b.arrayOffset(), buf, offset, limit); 306 307 // Pad any remaining output bytes with NUL 308 for (int i = limit; i < length; ++i) { 309 buf[offset + i] = 0; 310 } 311 312 return offset + length; 313 } 314 315 /** 316 * Write an octal integer into a buffer. 317 * 318 * Uses {@link #formatUnsignedOctalString} to format 319 * the value as an octal string with leading zeros. 320 * The converted number is followed by space and NUL 321 * 322 * @param value The value to write 323 * @param buf The buffer to receive the output 324 * @param offset The starting offset into the buffer 325 * @param length The size of the output buffer 326 * @return The updated offset, i.e. offset+length 327 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer 328 */ 329 public static int formatOctalBytes(final long value, final byte[] buf, final int offset, final int length) { 330 331 int idx=length-2; // For space and trailing null 332 formatUnsignedOctalString(value, buf, offset, idx); 333 334 buf[offset + idx++] = (byte) ' '; // Trailing space 335 buf[offset + idx] = 0; // Trailing null 336 337 return offset + length; 338 } 339 340 /** 341 * Fill buffer with unsigned octal number, padded with leading zeroes. 342 * 343 * @param value number to convert to octal - treated as unsigned 344 * @param buffer destination buffer 345 * @param offset starting offset in buffer 346 * @param length length of buffer to fill 347 * @throws IllegalArgumentException if the value will not fit in the buffer 348 */ 349 public static void formatUnsignedOctalString(final long value, final byte[] buffer, 350 final int offset, final int length) { 351 int remaining = length; 352 remaining--; 353 if (value == 0) { 354 buffer[offset + remaining--] = (byte) '0'; 355 } else { 356 long val = value; 357 for (; remaining >= 0 && val != 0; --remaining) { 358 // CheckStyle:MagicNumber OFF 359 buffer[offset + remaining] = (byte) ((byte) '0' + (byte) (val & 7)); 360 val = val >>> 3; 361 // CheckStyle:MagicNumber ON 362 } 363 if (val != 0){ 364 throw new IllegalArgumentException 365 (value+"="+Long.toOctalString(value)+ " will not fit in octal number buffer of length "+length); 366 } 367 } 368 369 for (; remaining >= 0; --remaining) { // leading zeros 370 buffer[offset + remaining] = (byte) '0'; 371 } 372 } 373 374 private static long parseBinaryBigInteger(final byte[] buffer, 375 final int offset, 376 final int length, 377 final boolean negative) { 378 final byte[] remainder = new byte[length - 1]; 379 System.arraycopy(buffer, offset + 1, remainder, 0, length - 1); 380 BigInteger val = new BigInteger(remainder); 381 if (negative) { 382 // 2's complement 383 val = val.add(BigInteger.valueOf(-1)).not(); 384 } 385 if (val.bitLength() > 63) { 386 throw new IllegalArgumentException("At offset " + offset + ", " 387 + length + " byte binary number" 388 + " exceeds maximum signed long" 389 + " value"); 390 } 391 return negative ? -val.longValue() : val.longValue(); 392 } 393 394 private static long parseBinaryLong(final byte[] buffer, final int offset, 395 final int length, 396 final boolean negative) { 397 if (length >= 9) { 398 throw new IllegalArgumentException("At offset " + offset + ", " 399 + length + " byte binary number" 400 + " exceeds maximum signed long" 401 + " value"); 402 } 403 long val = 0; 404 for (int i = 1; i < length; i++) { 405 val = (val << 8) + (buffer[offset + i] & 0xff); 406 } 407 if (negative) { 408 // 2's complement 409 val--; 410 val ^= (long) Math.pow(2.0, (length - 1) * 8.0) - 1; 411 } 412 return negative ? -val : val; 413 } 414 415 /** 416 * Parse a boolean byte from a buffer. 417 * Leading spaces and NUL are ignored. 418 * The buffer may contain trailing spaces or NULs. 419 * 420 * @param buffer The buffer from which to parse. 421 * @param offset The offset into the buffer from which to parse. 422 * @return The boolean value of the bytes. 423 * @throws IllegalArgumentException if an invalid byte is detected. 424 */ 425 public static boolean parseBoolean(final byte[] buffer, final int offset) { 426 return buffer[offset] == 1; 427 } 428 429 /** 430 * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map 431 * GNU.sparse.map 432 * Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 433 * 434 * @param sparseMap the sparse map string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 435 * @return unmodifiable list of sparse headers parsed from sparse map 436 * @throws IOException Corrupted TAR archive. 437 * @since 1.21 438 */ 439 protected static List<TarArchiveStructSparse> parseFromPAX01SparseHeaders(final String sparseMap) 440 throws IOException { 441 final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>(); 442 final String[] sparseHeaderStrings = sparseMap.split(","); 443 if (sparseHeaderStrings.length % 2 == 1) { 444 throw new IOException("Corrupted TAR archive. Bad format in GNU.sparse.map PAX Header"); 445 } 446 447 for (int i = 0; i < sparseHeaderStrings.length; i += 2) { 448 long sparseOffset; 449 try { 450 sparseOffset = Long.parseLong(sparseHeaderStrings[i]); 451 } catch (final NumberFormatException ex) { 452 throw new IOException("Corrupted TAR archive." 453 + " Sparse struct offset contains a non-numeric value"); 454 } 455 if (sparseOffset < 0) { 456 throw new IOException("Corrupted TAR archive." 457 + " Sparse struct offset contains negative value"); 458 } 459 long sparseNumbytes; 460 try { 461 sparseNumbytes = Long.parseLong(sparseHeaderStrings[i + 1]); 462 } catch (final NumberFormatException ex) { 463 throw new IOException("Corrupted TAR archive." 464 + " Sparse struct numbytes contains a non-numeric value"); 465 } 466 if (sparseNumbytes < 0) { 467 throw new IOException("Corrupted TAR archive." 468 + " Sparse struct numbytes contains negative value"); 469 } 470 sparseHeaders.add(new TarArchiveStructSparse(sparseOffset, sparseNumbytes)); 471 } 472 473 return Collections.unmodifiableList(sparseHeaders); 474 } 475 476 /** 477 * Parse an entry name from a buffer. 478 * Parsing stops when a NUL is found 479 * or the buffer length is reached. 480 * 481 * @param buffer The buffer from which to parse. 482 * @param offset The offset into the buffer from which to parse. 483 * @param length The maximum number of bytes to parse. 484 * @return The entry name. 485 */ 486 public static String parseName(final byte[] buffer, final int offset, final int length) { 487 try { 488 return parseName(buffer, offset, length, DEFAULT_ENCODING); 489 } catch (final IOException ex) { // NOSONAR 490 try { 491 return parseName(buffer, offset, length, FALLBACK_ENCODING); 492 } catch (final IOException ex2) { 493 // impossible 494 throw new UncheckedIOException(ex2); //NOSONAR 495 } 496 } 497 } 498 499 /** 500 * Parse an entry name from a buffer. 501 * Parsing stops when a NUL is found 502 * or the buffer length is reached. 503 * 504 * @param buffer The buffer from which to parse. 505 * @param offset The offset into the buffer from which to parse. 506 * @param length The maximum number of bytes to parse. 507 * @param encoding name of the encoding to use for file names 508 * @since 1.4 509 * @return The entry name. 510 * @throws IOException on error 511 */ 512 public static String parseName(final byte[] buffer, final int offset, 513 final int length, 514 final ZipEncoding encoding) 515 throws IOException { 516 517 int len = 0; 518 for (int i = offset; len < length && buffer[i] != 0; i++) { 519 len++; 520 } 521 if (len > 0) { 522 final byte[] b = new byte[len]; 523 System.arraycopy(buffer, offset, b, 0, len); 524 return encoding.decode(b); 525 } 526 return ""; 527 } 528 529 /** 530 * Parse an octal string from a buffer. 531 * 532 * <p>Leading spaces are ignored. 533 * The buffer must contain a trailing space or NUL, 534 * and may contain an additional trailing space or NUL.</p> 535 * 536 * <p>The input buffer is allowed to contain all NULs, 537 * in which case the method returns 0L 538 * (this allows for missing fields).</p> 539 * 540 * <p>To work-around some tar implementations that insert a 541 * leading NUL this method returns 0 if it detects a leading NUL 542 * since Commons Compress 1.4.</p> 543 * 544 * @param buffer The buffer from which to parse. 545 * @param offset The offset into the buffer from which to parse. 546 * @param length The maximum number of bytes to parse - must be at least 2 bytes. 547 * @return The long value of the octal string. 548 * @throws IllegalArgumentException if the trailing space/NUL is missing or if an invalid byte is detected. 549 */ 550 public static long parseOctal(final byte[] buffer, final int offset, final int length) { 551 long result = 0; 552 int end = offset + length; 553 int start = offset; 554 555 if (length < 2) { 556 throw new IllegalArgumentException("Length " + length + " must be at least 2"); 557 } 558 559 if (buffer[start] == 0) { 560 return 0L; 561 } 562 563 // Skip leading spaces 564 while (start < end) { 565 if (buffer[start] != ' ') { 566 break; 567 } 568 start++; 569 } 570 571 // Trim all trailing NULs and spaces. 572 // The ustar and POSIX tar specs require a trailing NUL or 573 // space but some implementations use the extra digit for big 574 // sizes/uids/gids ... 575 byte trailer = buffer[end - 1]; 576 while (start < end && (trailer == 0 || trailer == ' ')) { 577 end--; 578 trailer = buffer[end - 1]; 579 } 580 581 for (; start < end; start++) { 582 final byte currentByte = buffer[start]; 583 // CheckStyle:MagicNumber OFF 584 if (currentByte < '0' || currentByte > '7') { 585 throw new IllegalArgumentException(exceptionMessage(buffer, offset, length, start, currentByte)); 586 } 587 result = (result << 3) + (currentByte - '0'); // convert from ASCII 588 // CheckStyle:MagicNumber ON 589 } 590 591 return result; 592 } 593 594 /** 595 * Compute the value contained in a byte buffer. If the most 596 * significant bit of the first byte in the buffer is set, this 597 * bit is ignored and the rest of the buffer is interpreted as a 598 * binary number. Otherwise, the buffer is interpreted as an 599 * octal number as per the parseOctal function above. 600 * 601 * @param buffer The buffer from which to parse. 602 * @param offset The offset into the buffer from which to parse. 603 * @param length The maximum number of bytes to parse. 604 * @return The long value of the octal or binary string. 605 * @throws IllegalArgumentException if the trailing space/NUL is 606 * missing or an invalid byte is detected in an octal number, or 607 * if a binary number would exceed the size of a signed long 608 * 64-bit integer. 609 * @since 1.4 610 */ 611 public static long parseOctalOrBinary(final byte[] buffer, final int offset, 612 final int length) { 613 614 if ((buffer[offset] & 0x80) == 0) { 615 return parseOctal(buffer, offset, length); 616 } 617 final boolean negative = buffer[offset] == (byte) 0xff; 618 if (length < 9) { 619 return parseBinaryLong(buffer, offset, length, negative); 620 } 621 return parseBinaryBigInteger(buffer, offset, length, negative); 622 } 623 624 /** 625 * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map 626 * GNU.sparse.map 627 * Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 628 * 629 * <p>Will internally invoke {@link #parseFromPAX01SparseHeaders} and map IOExceptions to a RzuntimeException, You 630 * should use {@link #parseFromPAX01SparseHeaders} directly instead. 631 * 632 * @param sparseMap the sparse map string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 633 * @return sparse headers parsed from sparse map 634 * @deprecated use #parseFromPAX01SparseHeaders instead 635 */ 636 @Deprecated 637 protected static List<TarArchiveStructSparse> parsePAX01SparseHeaders(final String sparseMap) { 638 try { 639 return parseFromPAX01SparseHeaders(sparseMap); 640 } catch (final IOException ex) { 641 throw new UncheckedIOException(ex.getMessage(), ex); 642 } 643 } 644 645 /** 646 * For PAX Format 1.X: 647 * The sparse map itself is stored in the file data block, preceding the actual file data. 648 * It consists of a series of decimal numbers delimited by newlines. The map is padded with nulls to the nearest block boundary. 649 * The first number gives the number of entries in the map. Following are map entries, each one consisting of two numbers 650 * giving the offset and size of the data block it describes. 651 * @param inputStream parsing source. 652 * @param recordSize The size the TAR header 653 * @return sparse headers 654 * @throws IOException if an I/O error occurs. 655 */ 656 protected static List<TarArchiveStructSparse> parsePAX1XSparseHeaders(final InputStream inputStream, final int recordSize) throws IOException { 657 // for 1.X PAX Headers 658 final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>(); 659 long bytesRead = 0; 660 661 long[] readResult = readLineOfNumberForPax1X(inputStream); 662 long sparseHeadersCount = readResult[0]; 663 if (sparseHeadersCount < 0) { 664 // overflow while reading number? 665 throw new IOException("Corrupted TAR archive. Negative value in sparse headers block"); 666 } 667 bytesRead += readResult[1]; 668 while (sparseHeadersCount-- > 0) { 669 readResult = readLineOfNumberForPax1X(inputStream); 670 final long sparseOffset = readResult[0]; 671 if (sparseOffset < 0) { 672 throw new IOException("Corrupted TAR archive." 673 + " Sparse header block offset contains negative value"); 674 } 675 bytesRead += readResult[1]; 676 677 readResult = readLineOfNumberForPax1X(inputStream); 678 final long sparseNumbytes = readResult[0]; 679 if (sparseNumbytes < 0) { 680 throw new IOException("Corrupted TAR archive." 681 + " Sparse header block numbytes contains negative value"); 682 } 683 bytesRead += readResult[1]; 684 sparseHeaders.add(new TarArchiveStructSparse(sparseOffset, sparseNumbytes)); 685 } 686 687 // skip the rest of this record data 688 final long bytesToSkip = recordSize - bytesRead % recordSize; 689 IOUtils.skip(inputStream, bytesToSkip); 690 return sparseHeaders; 691 } 692 693 /** 694 * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) 695 * may appear multi times, and they look like: 696 * 697 * GNU.sparse.size=size 698 * GNU.sparse.numblocks=numblocks 699 * repeat numblocks times 700 * GNU.sparse.offset=offset 701 * GNU.sparse.numbytes=numbytes 702 * end repeat 703 * 704 * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map 705 * 706 * GNU.sparse.map 707 * Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 708 * 709 * @param inputStream input stream to read keys and values 710 * @param sparseHeaders used in PAX Format 0.0 & 0.1, as it may appear multiple times, 711 * the sparse headers need to be stored in an array, not a map 712 * @param globalPaxHeaders global PAX headers of the tar archive 713 * @return map of PAX headers values found inside the current (local or global) PAX headers tar entry. 714 * @throws IOException if an I/O error occurs. 715 * @deprecated use the four-arg version instead 716 */ 717 @Deprecated 718 protected static Map<String, String> parsePaxHeaders(final InputStream inputStream, final List<TarArchiveStructSparse> sparseHeaders, final Map<String, String> globalPaxHeaders) 719 throws IOException { 720 return parsePaxHeaders(inputStream, sparseHeaders, globalPaxHeaders, -1); 721 } 722 723 /** 724 * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) 725 * may appear multi times, and they look like: 726 * 727 * GNU.sparse.size=size 728 * GNU.sparse.numblocks=numblocks 729 * repeat numblocks times 730 * GNU.sparse.offset=offset 731 * GNU.sparse.numbytes=numbytes 732 * end repeat 733 * 734 * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map 735 * 736 * GNU.sparse.map 737 * Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 738 * 739 * @param inputStream input stream to read keys and values 740 * @param sparseHeaders used in PAX Format 0.0 & 0.1, as it may appear multiple times, 741 * the sparse headers need to be stored in an array, not a map 742 * @param globalPaxHeaders global PAX headers of the tar archive 743 * @param headerSize total size of the PAX header, will be ignored if negative 744 * @return map of PAX headers values found inside the current (local or global) PAX headers tar entry. 745 * @throws IOException if an I/O error occurs. 746 * @since 1.21 747 */ 748 protected static Map<String, String> parsePaxHeaders(final InputStream inputStream, 749 final List<TarArchiveStructSparse> sparseHeaders, final Map<String, String> globalPaxHeaders, 750 final long headerSize) throws IOException { 751 final Map<String, String> headers = new HashMap<>(globalPaxHeaders); 752 Long offset = null; 753 // Format is "length keyword=value\n"; 754 int totalRead = 0; 755 while(true) { // get length 756 int ch; 757 int len = 0; 758 int read = 0; 759 while((ch = inputStream.read()) != -1) { 760 read++; 761 totalRead++; 762 if (ch == '\n') { // blank line in header 763 break; 764 } 765 if (ch == ' '){ // End of length string 766 // Get keyword 767 final ByteArrayOutputStream coll = new ByteArrayOutputStream(); 768 while((ch = inputStream.read()) != -1) { 769 read++; 770 totalRead++; 771 if (totalRead < 0 || (headerSize >= 0 && totalRead >= headerSize)) { 772 break; 773 } 774 if (ch == '='){ // end of keyword 775 final String keyword = coll.toString(CharsetNames.UTF_8); 776 // Get rest of entry 777 final int restLen = len - read; 778 if (restLen <= 1) { // only NL 779 headers.remove(keyword); 780 } else if (headerSize >= 0 && restLen > headerSize - totalRead) { 781 throw new IOException("Paxheader value size " + restLen 782 + " exceeds size of header record"); 783 } else { 784 final byte[] rest = IOUtils.readRange(inputStream, restLen); 785 final int got = rest.length; 786 if (got != restLen) { 787 throw new IOException("Failed to read " 788 + "Paxheader. Expected " 789 + restLen 790 + " bytes, read " 791 + got); 792 } 793 totalRead += restLen; 794 // Drop trailing NL 795 if (rest[restLen - 1] != '\n') { 796 throw new IOException("Failed to read Paxheader." 797 + "Value should end with a newline"); 798 } 799 final String value = new String(rest, 0, restLen - 1, StandardCharsets.UTF_8); 800 headers.put(keyword, value); 801 802 // for 0.0 PAX Headers 803 if (keyword.equals(TarGnuSparseKeys.OFFSET)) { 804 if (offset != null) { 805 // previous GNU.sparse.offset header but no numBytes 806 sparseHeaders.add(new TarArchiveStructSparse(offset, 0)); 807 } 808 try { 809 offset = Long.valueOf(value); 810 } catch (final NumberFormatException ex) { 811 throw new IOException("Failed to read Paxheader." 812 + TarGnuSparseKeys.OFFSET + " contains a non-numeric value"); 813 } 814 if (offset < 0) { 815 throw new IOException("Failed to read Paxheader." 816 + TarGnuSparseKeys.OFFSET + " contains negative value"); 817 } 818 } 819 820 // for 0.0 PAX Headers 821 if (keyword.equals(TarGnuSparseKeys.NUMBYTES)) { 822 if (offset == null) { 823 throw new IOException("Failed to read Paxheader." 824 + TarGnuSparseKeys.OFFSET + " is expected before GNU.sparse.numbytes shows up."); 825 } 826 long numbytes; 827 try { 828 numbytes = Long.parseLong(value); 829 } catch (final NumberFormatException ex) { 830 throw new IOException("Failed to read Paxheader." 831 + TarGnuSparseKeys.NUMBYTES + " contains a non-numeric value."); 832 } 833 if (numbytes < 0) { 834 throw new IOException("Failed to read Paxheader." 835 + TarGnuSparseKeys.NUMBYTES + " contains negative value"); 836 } 837 sparseHeaders.add(new TarArchiveStructSparse(offset, numbytes)); 838 offset = null; 839 } 840 } 841 break; 842 } 843 coll.write((byte) ch); 844 } 845 break; // Processed single header 846 } 847 848 // COMPRESS-530 : throw if we encounter a non-number while reading length 849 if (ch < '0' || ch > '9') { 850 throw new IOException("Failed to read Paxheader. Encountered a non-number while reading length"); 851 } 852 853 len *= 10; 854 len += ch - '0'; 855 } 856 if (ch == -1){ // EOF 857 break; 858 } 859 } 860 if (offset != null) { 861 // offset but no numBytes 862 sparseHeaders.add(new TarArchiveStructSparse(offset, 0)); 863 } 864 return headers; 865 } 866 867 /** 868 * Parses the content of a PAX 1.0 sparse block. 869 * @since 1.20 870 * @param buffer The buffer from which to parse. 871 * @param offset The offset into the buffer from which to parse. 872 * @return a parsed sparse struct 873 */ 874 public static TarArchiveStructSparse parseSparse(final byte[] buffer, final int offset) { 875 final long sparseOffset = parseOctalOrBinary(buffer, offset, TarConstants.SPARSE_OFFSET_LEN); 876 final long sparseNumbytes = parseOctalOrBinary(buffer, offset + TarConstants.SPARSE_OFFSET_LEN, TarConstants.SPARSE_NUMBYTES_LEN); 877 878 return new TarArchiveStructSparse(sparseOffset, sparseNumbytes); 879 } 880 881 /** 882 * For 1.X PAX Format, the sparse headers are stored in the file data block, preceding the actual file data. 883 * It consists of a series of decimal numbers delimited by newlines. 884 * 885 * @param inputStream the input stream of the tar file 886 * @return the decimal number delimited by '\n', and the bytes read from input stream 887 * @throws IOException 888 */ 889 private static long[] readLineOfNumberForPax1X(final InputStream inputStream) throws IOException { 890 int number; 891 long result = 0; 892 long bytesRead = 0; 893 894 while ((number = inputStream.read()) != '\n') { 895 bytesRead += 1; 896 if (number == -1) { 897 throw new IOException("Unexpected EOF when reading parse information of 1.X PAX format"); 898 } 899 if (number < '0' || number > '9') { 900 throw new IOException("Corrupted TAR archive. Non-numeric value in sparse headers block"); 901 } 902 result = result * 10 + (number - '0'); 903 } 904 bytesRead += 1; 905 906 return new long[]{result, bytesRead}; 907 } 908 909 /** 910 * @since 1.21 911 */ 912 static List<TarArchiveStructSparse> readSparseStructs(final byte[] buffer, final int offset, final int entries) 913 throws IOException { 914 final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>(); 915 for (int i = 0; i < entries; i++) { 916 try { 917 final TarArchiveStructSparse sparseHeader = 918 parseSparse(buffer, offset + i * (TarConstants.SPARSE_OFFSET_LEN + TarConstants.SPARSE_NUMBYTES_LEN)); 919 920 if (sparseHeader.getOffset() < 0) { 921 throw new IOException("Corrupted TAR archive, sparse entry with negative offset"); 922 } 923 if (sparseHeader.getNumbytes() < 0) { 924 throw new IOException("Corrupted TAR archive, sparse entry with negative numbytes"); 925 } 926 sparseHeaders.add(sparseHeader); 927 } catch (final IllegalArgumentException ex) { 928 // thrown internally by parseOctalOrBinary 929 throw new IOException("Corrupted TAR archive, sparse entry is invalid", ex); 930 } 931 } 932 return Collections.unmodifiableList(sparseHeaders); 933 } 934 935 /** 936 * Wikipedia <a href="https://en.wikipedia.org/wiki/Tar_(computing)#File_header">says</a>: 937 * <blockquote> 938 * The checksum is calculated by taking the sum of the unsigned byte values 939 * of the header block with the eight checksum bytes taken to be ascii 940 * spaces (decimal value 32). It is stored as a six digit octal number with 941 * leading zeroes followed by a NUL and then a space. Various 942 * implementations do not adhere to this format. For better compatibility, 943 * ignore leading and trailing whitespace, and get the first six digits. In 944 * addition, some historic tar implementations treated bytes as signed. 945 * Implementations typically calculate the checksum both ways, and treat it 946 * as good if either the signed or unsigned sum matches the included 947 * checksum. 948 * </blockquote> 949 * <p> 950 * The return value of this method should be treated as a best-effort 951 * heuristic rather than an absolute and final truth. The checksum 952 * verification logic may well evolve over time as more special cases 953 * are encountered. 954 * 955 * @param header tar header 956 * @return whether the checksum is reasonably good 957 * @see <a href="https://issues.apache.org/jira/browse/COMPRESS-191">COMPRESS-191</a> 958 * @since 1.5 959 */ 960 public static boolean verifyCheckSum(final byte[] header) { 961 final long storedSum = parseOctal(header, TarConstants.CHKSUM_OFFSET, TarConstants.CHKSUMLEN); 962 long unsignedSum = 0; 963 long signedSum = 0; 964 965 for (int i = 0; i < header.length; i++) { 966 byte b = header[i]; 967 if (TarConstants.CHKSUM_OFFSET <= i && i < TarConstants.CHKSUM_OFFSET + TarConstants.CHKSUMLEN) { 968 b = ' '; 969 } 970 unsignedSum += 0xff & b; 971 signedSum += b; 972 } 973 return storedSum == unsignedSum || storedSum == signedSum; 974 } 975 976 /** Private constructor to prevent instantiation of this utility class. */ 977 private TarUtils(){ 978 } 979 980}