001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018/* 019 * This package is based on the work done by Timothy Gerard Endres 020 * (time@ice.com) to whom the Ant project is very grateful for his great code. 021 */ 022 023package org.apache.commons.compress.archivers.tar; 024 025import java.io.ByteArrayOutputStream; 026import java.io.FileInputStream; 027import java.io.IOException; 028import java.io.InputStream; 029import java.util.ArrayList; 030import java.util.Arrays; 031import java.util.HashMap; 032import java.util.List; 033import java.util.Map; 034 035import org.apache.commons.compress.archivers.ArchiveEntry; 036import org.apache.commons.compress.archivers.ArchiveInputStream; 037import org.apache.commons.compress.archivers.zip.ZipEncoding; 038import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; 039import org.apache.commons.compress.utils.ArchiveUtils; 040import org.apache.commons.compress.utils.BoundedInputStream; 041import org.apache.commons.compress.utils.IOUtils; 042 043/** 044 * The TarInputStream reads a UNIX tar archive as an InputStream. 045 * methods are provided to position at each successive entry in 046 * the archive, and the read each entry as a normal input stream 047 * using read(). 048 * @NotThreadSafe 049 */ 050public class TarArchiveInputStream extends ArchiveInputStream { 051 052 private static final int SMALL_BUFFER_SIZE = 256; 053 054 /** 055 * Checks if the signature matches what is expected for a tar file. 056 * 057 * @param signature 058 * the bytes to check 059 * @param length 060 * the number of bytes to check 061 * @return true, if this stream is a tar archive stream, false otherwise 062 */ 063 public static boolean matches(final byte[] signature, final int length) { 064 if (length < TarConstants.VERSION_OFFSET+TarConstants.VERSIONLEN) { 065 return false; 066 } 067 068 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_POSIX, 069 signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN) 070 && 071 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_POSIX, 072 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN) 073 ){ 074 return true; 075 } 076 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_GNU, 077 signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN) 078 && 079 ( 080 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_SPACE, 081 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN) 082 || 083 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_ZERO, 084 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN) 085 ) 086 ){ 087 return true; 088 } 089 // COMPRESS-107 - recognise Ant tar files 090 return ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_ANT, 091 signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN) 092 && 093 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_ANT, 094 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN); 095 } 096 097 private final byte[] smallBuf = new byte[SMALL_BUFFER_SIZE]; 098 099 /** The size the TAR header */ 100 private final int recordSize; 101 102 /** The buffer to store the TAR header **/ 103 private final byte[] recordBuffer; 104 105 /** The size of a block */ 106 private final int blockSize; 107 108 /** True if file has hit EOF */ 109 private boolean hasHitEOF; 110 111 /** Size of the current entry */ 112 private long entrySize; 113 114 /** How far into the entry the stream is at */ 115 private long entryOffset; 116 117 /** An input stream to read from */ 118 private final InputStream inputStream; 119 120 /** Input streams for reading sparse entries **/ 121 private List<InputStream> sparseInputStreams; 122 123 /** the index of current input stream being read when reading sparse entries */ 124 private int currentSparseInputStreamIndex; 125 126 /** The meta-data about the current entry */ 127 private TarArchiveEntry currEntry; 128 129 /** The encoding of the file */ 130 private final ZipEncoding zipEncoding; 131 132 // the provided encoding (for unit tests) 133 final String encoding; 134 135 // the global PAX header 136 private Map<String, String> globalPaxHeaders = new HashMap<>(); 137 138 // the global sparse headers, this is only used in PAX Format 0.X 139 private final List<TarArchiveStructSparse> globalSparseHeaders = new ArrayList<>(); 140 141 private final boolean lenient; 142 143 /** 144 * Constructor for TarInputStream. 145 * @param is the input stream to use 146 */ 147 public TarArchiveInputStream(final InputStream is) { 148 this(is, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE); 149 } 150 151 /** 152 * Constructor for TarInputStream. 153 * @param is the input stream to use 154 * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be 155 * ignored and the fields set to {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an 156 * exception instead. 157 * @since 1.19 158 */ 159 public TarArchiveInputStream(final InputStream is, final boolean lenient) { 160 this(is, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, lenient); 161 } 162 163 /** 164 * Constructor for TarInputStream. 165 * @param is the input stream to use 166 * @param blockSize the block size to use 167 */ 168 public TarArchiveInputStream(final InputStream is, final int blockSize) { 169 this(is, blockSize, TarConstants.DEFAULT_RCDSIZE); 170 } 171 172 /** 173 * Constructor for TarInputStream. 174 * @param is the input stream to use 175 * @param blockSize the block size to use 176 * @param recordSize the record size to use 177 */ 178 public TarArchiveInputStream(final InputStream is, final int blockSize, final int recordSize) { 179 this(is, blockSize, recordSize, null); 180 } 181 182 /** 183 * Constructor for TarInputStream. 184 * @param is the input stream to use 185 * @param blockSize the block size to use 186 * @param recordSize the record size to use 187 * @param encoding name of the encoding to use for file names 188 * @since 1.4 189 */ 190 public TarArchiveInputStream(final InputStream is, final int blockSize, final int recordSize, 191 final String encoding) { 192 this(is, blockSize, recordSize, encoding, false); 193 } 194 195 /** 196 * Constructor for TarInputStream. 197 * @param is the input stream to use 198 * @param blockSize the block size to use 199 * @param recordSize the record size to use 200 * @param encoding name of the encoding to use for file names 201 * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be 202 * ignored and the fields set to {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an 203 * exception instead. 204 * @since 1.19 205 */ 206 public TarArchiveInputStream(final InputStream is, final int blockSize, final int recordSize, 207 final String encoding, final boolean lenient) { 208 this.inputStream = is; 209 this.hasHitEOF = false; 210 this.encoding = encoding; 211 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 212 this.recordSize = recordSize; 213 this.recordBuffer = new byte[recordSize]; 214 this.blockSize = blockSize; 215 this.lenient = lenient; 216 } 217 218 /** 219 * Constructor for TarInputStream. 220 * @param is the input stream to use 221 * @param blockSize the block size to use 222 * @param encoding name of the encoding to use for file names 223 * @since 1.4 224 */ 225 public TarArchiveInputStream(final InputStream is, final int blockSize, 226 final String encoding) { 227 this(is, blockSize, TarConstants.DEFAULT_RCDSIZE, encoding); 228 } 229 230 /** 231 * Constructor for TarInputStream. 232 * @param is the input stream to use 233 * @param encoding name of the encoding to use for file names 234 * @since 1.4 235 */ 236 public TarArchiveInputStream(final InputStream is, final String encoding) { 237 this(is, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, 238 encoding); 239 } 240 241 private void applyPaxHeadersToCurrentEntry(final Map<String, String> headers, final List<TarArchiveStructSparse> sparseHeaders) 242 throws IOException { 243 currEntry.updateEntryFromPaxHeaders(headers); 244 currEntry.setSparseHeaders(sparseHeaders); 245 } 246 247 /** 248 * Get the available data that can be read from the current 249 * entry in the archive. This does not indicate how much data 250 * is left in the entire archive, only in the current entry. 251 * This value is determined from the entry's size header field 252 * and the amount of data already read from the current entry. 253 * Integer.MAX_VALUE is returned in case more than Integer.MAX_VALUE 254 * bytes are left in the current entry in the archive. 255 * 256 * @return The number of available bytes for the current entry. 257 * @throws IOException for signature 258 */ 259 @Override 260 public int available() throws IOException { 261 if (isDirectory()) { 262 return 0; 263 } 264 265 if (currEntry.getRealSize() - entryOffset > Integer.MAX_VALUE) { 266 return Integer.MAX_VALUE; 267 } 268 return (int) (currEntry.getRealSize() - entryOffset); 269 } 270 271 272 /** 273 * Build the input streams consisting of all-zero input streams and non-zero input streams. 274 * When reading from the non-zero input streams, the data is actually read from the original input stream. 275 * The size of each input stream is introduced by the sparse headers. 276 * 277 * NOTE : Some all-zero input streams and non-zero input streams have the size of 0. We DO NOT store the 278 * 0 size input streams because they are meaningless. 279 */ 280 private void buildSparseInputStreams() throws IOException { 281 currentSparseInputStreamIndex = -1; 282 sparseInputStreams = new ArrayList<>(); 283 284 final List<TarArchiveStructSparse> sparseHeaders = currEntry.getOrderedSparseHeaders(); 285 286 // Stream doesn't need to be closed at all as it doesn't use any resources 287 final InputStream zeroInputStream = new TarArchiveSparseZeroInputStream(); //NOSONAR 288 // logical offset into the extracted entry 289 long offset = 0; 290 for (final TarArchiveStructSparse sparseHeader : sparseHeaders) { 291 final long zeroBlockSize = sparseHeader.getOffset() - offset; 292 if (zeroBlockSize < 0) { 293 // sparse header says to move backwards inside the extracted entry 294 throw new IOException("Corrupted struct sparse detected"); 295 } 296 297 // only store the zero block if it is not empty 298 if (zeroBlockSize > 0) { 299 sparseInputStreams.add(new BoundedInputStream(zeroInputStream, sparseHeader.getOffset() - offset)); 300 } 301 302 // only store the input streams with non-zero size 303 if (sparseHeader.getNumbytes() > 0) { 304 sparseInputStreams.add(new BoundedInputStream(inputStream, sparseHeader.getNumbytes())); 305 } 306 307 offset = sparseHeader.getOffset() + sparseHeader.getNumbytes(); 308 } 309 310 if (!sparseInputStreams.isEmpty()) { 311 currentSparseInputStreamIndex = 0; 312 } 313 } 314 315 /** 316 * Whether this class is able to read the given entry. 317 * 318 * @return The implementation will return true if the {@link ArchiveEntry} is an instance of {@link TarArchiveEntry} 319 */ 320 @Override 321 public boolean canReadEntryData(final ArchiveEntry ae) { 322 return ae instanceof TarArchiveEntry; 323 } 324 325 /** 326 * Closes this stream. Calls the TarBuffer's close() method. 327 * @throws IOException on error 328 */ 329 @Override 330 public void close() throws IOException { 331 // Close all the input streams in sparseInputStreams 332 if (sparseInputStreams != null) { 333 for (final InputStream inputStream : sparseInputStreams) { 334 inputStream.close(); 335 } 336 } 337 338 inputStream.close(); 339 } 340 341 /** 342 * This method is invoked once the end of the archive is hit, it 343 * tries to consume the remaining bytes under the assumption that 344 * the tool creating this archive has padded the last block. 345 */ 346 private void consumeRemainderOfLastBlock() throws IOException { 347 final long bytesReadOfLastBlock = getBytesRead() % blockSize; 348 if (bytesReadOfLastBlock > 0) { 349 final long skipped = IOUtils.skip(inputStream, blockSize - bytesReadOfLastBlock); 350 count(skipped); 351 } 352 } 353 354 /** 355 * For FileInputStream, the skip always return the number you input, so we 356 * need the available bytes to determine how many bytes are actually skipped 357 * 358 * @param available available bytes returned by inputStream.available() 359 * @param skipped skipped bytes returned by inputStream.skip() 360 * @param expected bytes expected to skip 361 * @return number of bytes actually skipped 362 * @throws IOException if a truncated tar archive is detected 363 */ 364 private long getActuallySkipped(final long available, final long skipped, final long expected) throws IOException { 365 long actuallySkipped = skipped; 366 if (inputStream instanceof FileInputStream) { 367 actuallySkipped = Math.min(skipped, available); 368 } 369 370 if (actuallySkipped != expected) { 371 throw new IOException("Truncated TAR archive"); 372 } 373 374 return actuallySkipped; 375 } 376 377 /** 378 * Get the current TAR Archive Entry that this input stream is processing 379 * 380 * @return The current Archive Entry 381 */ 382 public TarArchiveEntry getCurrentEntry() { 383 return currEntry; 384 } 385 386 /** 387 * Get the next entry in this tar archive as longname data. 388 * 389 * @return The next entry in the archive as longname data, or null. 390 * @throws IOException on error 391 */ 392 protected byte[] getLongNameData() throws IOException { 393 // read in the name 394 final ByteArrayOutputStream longName = new ByteArrayOutputStream(); 395 int length = 0; 396 while ((length = read(smallBuf)) >= 0) { 397 longName.write(smallBuf, 0, length); 398 } 399 getNextEntry(); 400 if (currEntry == null) { 401 // Bugzilla: 40334 402 // Malformed tar file - long entry name not followed by entry 403 return null; 404 } 405 byte[] longNameData = longName.toByteArray(); 406 // remove trailing null terminator(s) 407 length = longNameData.length; 408 while (length > 0 && longNameData[length - 1] == 0) { 409 --length; 410 } 411 if (length != longNameData.length) { 412 longNameData = Arrays.copyOf(longNameData, length); 413 } 414 return longNameData; 415 } 416 417 /** 418 * Returns the next Archive Entry in this Stream. 419 * 420 * @return the next entry, 421 * or {@code null} if there are no more entries 422 * @throws IOException if the next entry could not be read 423 */ 424 @Override 425 public ArchiveEntry getNextEntry() throws IOException { 426 return getNextTarEntry(); 427 } 428 429 /** 430 * Get the next entry in this tar archive. This will skip 431 * over any remaining data in the current entry, if there 432 * is one, and place the input stream at the header of the 433 * next entry, and read the header and instantiate a new 434 * TarEntry from the header bytes and return that entry. 435 * If there are no more entries in the archive, null will 436 * be returned to indicate that the end of the archive has 437 * been reached. 438 * 439 * @return The next TarEntry in the archive, or null. 440 * @throws IOException on error 441 */ 442 public TarArchiveEntry getNextTarEntry() throws IOException { 443 if (isAtEOF()) { 444 return null; 445 } 446 447 if (currEntry != null) { 448 /* Skip will only go to the end of the current entry */ 449 IOUtils.skip(this, Long.MAX_VALUE); 450 451 /* skip to the end of the last record */ 452 skipRecordPadding(); 453 } 454 455 final byte[] headerBuf = getRecord(); 456 457 if (headerBuf == null) { 458 /* hit EOF */ 459 currEntry = null; 460 return null; 461 } 462 463 try { 464 currEntry = new TarArchiveEntry(globalPaxHeaders, headerBuf, zipEncoding, lenient); 465 } catch (final IllegalArgumentException e) { 466 throw new IOException("Error detected parsing the header", e); 467 } 468 469 entryOffset = 0; 470 entrySize = currEntry.getSize(); 471 472 if (currEntry.isGNULongLinkEntry()) { 473 final byte[] longLinkData = getLongNameData(); 474 if (longLinkData == null) { 475 // Bugzilla: 40334 476 // Malformed tar file - long link entry name not followed by 477 // entry 478 return null; 479 } 480 currEntry.setLinkName(zipEncoding.decode(longLinkData)); 481 } 482 483 if (currEntry.isGNULongNameEntry()) { 484 final byte[] longNameData = getLongNameData(); 485 if (longNameData == null) { 486 // Bugzilla: 40334 487 // Malformed tar file - long entry name not followed by 488 // entry 489 return null; 490 } 491 492 // COMPRESS-509 : the name of directories should end with '/' 493 final String name = zipEncoding.decode(longNameData); 494 currEntry.setName(name); 495 if (currEntry.isDirectory() && !name.endsWith("/")) { 496 currEntry.setName(name + "/"); 497 } 498 } 499 500 if (currEntry.isGlobalPaxHeader()){ // Process Global Pax headers 501 readGlobalPaxHeaders(); 502 } 503 504 try { 505 if (currEntry.isPaxHeader()){ // Process Pax headers 506 paxHeaders(); 507 } else if (!globalPaxHeaders.isEmpty()) { 508 applyPaxHeadersToCurrentEntry(globalPaxHeaders, globalSparseHeaders); 509 } 510 } catch (final NumberFormatException e) { 511 throw new IOException("Error detected parsing the pax header", e); 512 } 513 514 if (currEntry.isOldGNUSparse()){ // Process sparse files 515 readOldGNUSparse(); 516 } 517 518 // If the size of the next element in the archive has changed 519 // due to a new size being reported in the posix header 520 // information, we update entrySize here so that it contains 521 // the correct value. 522 entrySize = currEntry.getSize(); 523 524 return currEntry; 525 } 526 527 /** 528 * Get the next record in this tar archive. This will skip 529 * over any remaining data in the current entry, if there 530 * is one, and place the input stream at the header of the 531 * next entry. 532 * 533 * <p>If there are no more entries in the archive, null will be 534 * returned to indicate that the end of the archive has been 535 * reached. At the same time the {@code hasHitEOF} marker will be 536 * set to true.</p> 537 * 538 * @return The next header in the archive, or null. 539 * @throws IOException on error 540 */ 541 private byte[] getRecord() throws IOException { 542 byte[] headerBuf = readRecord(); 543 setAtEOF(isEOFRecord(headerBuf)); 544 if (isAtEOF() && headerBuf != null) { 545 tryToConsumeSecondEOFRecord(); 546 consumeRemainderOfLastBlock(); 547 headerBuf = null; 548 } 549 return headerBuf; 550 } 551 552 /** 553 * Get the record size being used by this stream's buffer. 554 * 555 * @return The TarBuffer record size. 556 */ 557 public int getRecordSize() { 558 return recordSize; 559 } 560 561 protected final boolean isAtEOF() { 562 return hasHitEOF; 563 } 564 565 private boolean isDirectory() { 566 return currEntry != null && currEntry.isDirectory(); 567 } 568 569 /** 570 * Determine if an archive record indicate End of Archive. End of 571 * archive is indicated by a record that consists entirely of null bytes. 572 * 573 * @param record The record data to check. 574 * @return true if the record data is an End of Archive 575 */ 576 protected boolean isEOFRecord(final byte[] record) { 577 return record == null || ArchiveUtils.isArrayZero(record, recordSize); 578 } 579 580 /** 581 * Since we do not support marking just yet, we do nothing. 582 * 583 * @param markLimit The limit to mark. 584 */ 585 @Override 586 public synchronized void mark(final int markLimit) { 587 } 588 589 /** 590 * Since we do not support marking just yet, we return false. 591 * 592 * @return False. 593 */ 594 @Override 595 public boolean markSupported() { 596 return false; 597 } 598 599 /** 600 * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) 601 * may appear multi times, and they look like: 602 * 603 * GNU.sparse.size=size 604 * GNU.sparse.numblocks=numblocks 605 * repeat numblocks times 606 * GNU.sparse.offset=offset 607 * GNU.sparse.numbytes=numbytes 608 * end repeat 609 * 610 * 611 * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map 612 * 613 * GNU.sparse.map 614 * Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 615 * 616 * 617 * For PAX Format 1.X: 618 * The sparse map itself is stored in the file data block, preceding the actual file data. 619 * It consists of a series of decimal numbers delimited by newlines. The map is padded with nulls to the nearest block boundary. 620 * The first number gives the number of entries in the map. Following are map entries, each one consisting of two numbers 621 * giving the offset and size of the data block it describes. 622 * @throws IOException 623 */ 624 private void paxHeaders() throws IOException { 625 List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>(); 626 final Map<String, String> headers = TarUtils.parsePaxHeaders(this, sparseHeaders, globalPaxHeaders, entrySize); 627 628 // for 0.1 PAX Headers 629 if (headers.containsKey(TarGnuSparseKeys.MAP)) { 630 sparseHeaders = new ArrayList<>(TarUtils.parseFromPAX01SparseHeaders(headers.get(TarGnuSparseKeys.MAP))); 631 } 632 getNextEntry(); // Get the actual file entry 633 if (currEntry == null) { 634 throw new IOException("premature end of tar archive. Didn't find any entry after PAX header."); 635 } 636 applyPaxHeadersToCurrentEntry(headers, sparseHeaders); 637 638 // for 1.0 PAX Format, the sparse map is stored in the file data block 639 if (currEntry.isPaxGNU1XSparse()) { 640 sparseHeaders = TarUtils.parsePAX1XSparseHeaders(inputStream, recordSize); 641 currEntry.setSparseHeaders(sparseHeaders); 642 } 643 644 // sparse headers are all done reading, we need to build 645 // sparse input streams using these sparse headers 646 buildSparseInputStreams(); 647 } 648 649 /** 650 * Reads bytes from the current tar archive entry. 651 * 652 * This method is aware of the boundaries of the current 653 * entry in the archive and will deal with them as if they 654 * were this stream's start and EOF. 655 * 656 * @param buf The buffer into which to place bytes read. 657 * @param offset The offset at which to place bytes read. 658 * @param numToRead The number of bytes to read. 659 * @return The number of bytes read, or -1 at EOF. 660 * @throws IOException on error 661 */ 662 @Override 663 public int read(final byte[] buf, final int offset, int numToRead) throws IOException { 664 if (numToRead == 0) { 665 return 0; 666 } 667 int totalRead = 0; 668 669 if (isAtEOF() || isDirectory()) { 670 return -1; 671 } 672 673 if (currEntry == null) { 674 throw new IllegalStateException("No current tar entry"); 675 } 676 677 if (entryOffset >= currEntry.getRealSize()) { 678 return -1; 679 } 680 681 numToRead = Math.min(numToRead, available()); 682 683 if (currEntry.isSparse()) { 684 // for sparse entries, we need to read them in another way 685 totalRead = readSparse(buf, offset, numToRead); 686 } else { 687 totalRead = inputStream.read(buf, offset, numToRead); 688 } 689 690 if (totalRead == -1) { 691 if (numToRead > 0) { 692 throw new IOException("Truncated TAR archive"); 693 } 694 setAtEOF(true); 695 } else { 696 count(totalRead); 697 entryOffset += totalRead; 698 } 699 700 return totalRead; 701 } 702 703 private void readGlobalPaxHeaders() throws IOException { 704 globalPaxHeaders = TarUtils.parsePaxHeaders(this, globalSparseHeaders, globalPaxHeaders, entrySize); 705 getNextEntry(); // Get the actual file entry 706 707 if (currEntry == null) { 708 throw new IOException("Error detected parsing the pax header"); 709 } 710 } 711 712 /** 713 * Adds the sparse chunks from the current entry to the sparse chunks, 714 * including any additional sparse entries following the current entry. 715 * 716 * @throws IOException on error 717 */ 718 private void readOldGNUSparse() throws IOException { 719 if (currEntry.isExtended()) { 720 TarArchiveSparseEntry entry; 721 do { 722 final byte[] headerBuf = getRecord(); 723 if (headerBuf == null) { 724 throw new IOException("premature end of tar archive. Didn't find extended_header after header with extended flag."); 725 } 726 entry = new TarArchiveSparseEntry(headerBuf); 727 currEntry.getSparseHeaders().addAll(entry.getSparseHeaders()); 728 } while (entry.isExtended()); 729 } 730 731 // sparse headers are all done reading, we need to build 732 // sparse input streams using these sparse headers 733 buildSparseInputStreams(); 734 } 735 736 /** 737 * Read a record from the input stream and return the data. 738 * 739 * @return The record data or null if EOF has been hit. 740 * @throws IOException on error 741 */ 742 protected byte[] readRecord() throws IOException { 743 final int readNow = IOUtils.readFully(inputStream, recordBuffer); 744 count(readNow); 745 if (readNow != recordSize) { 746 return null; 747 } 748 749 return recordBuffer; 750 } 751 752 /** 753 * For sparse tar entries, there are many "holes"(consisting of all 0) in the file. Only the non-zero data is 754 * stored in tar files, and they are stored separately. The structure of non-zero data is introduced by the 755 * sparse headers using the offset, where a block of non-zero data starts, and numbytes, the length of the 756 * non-zero data block. 757 * When reading sparse entries, the actual data is read out with "holes" and non-zero data combined together 758 * according to the sparse headers. 759 * 760 * @param buf The buffer into which to place bytes read. 761 * @param offset The offset at which to place bytes read. 762 * @param numToRead The number of bytes to read. 763 * @return The number of bytes read, or -1 at EOF. 764 * @throws IOException on error 765 */ 766 private int readSparse(final byte[] buf, final int offset, final int numToRead) throws IOException { 767 // if there are no actual input streams, just read from the original input stream 768 if (sparseInputStreams == null || sparseInputStreams.isEmpty()) { 769 return inputStream.read(buf, offset, numToRead); 770 } 771 772 if (currentSparseInputStreamIndex >= sparseInputStreams.size()) { 773 return -1; 774 } 775 776 final InputStream currentInputStream = sparseInputStreams.get(currentSparseInputStreamIndex); 777 final int readLen = currentInputStream.read(buf, offset, numToRead); 778 779 // if the current input stream is the last input stream, 780 // just return the number of bytes read from current input stream 781 if (currentSparseInputStreamIndex == sparseInputStreams.size() - 1) { 782 return readLen; 783 } 784 785 // if EOF of current input stream is meet, open a new input stream and recursively call read 786 if (readLen == -1) { 787 currentSparseInputStreamIndex++; 788 return readSparse(buf, offset, numToRead); 789 } 790 791 // if the rest data of current input stream is not long enough, open a new input stream 792 // and recursively call read 793 if (readLen < numToRead) { 794 currentSparseInputStreamIndex++; 795 final int readLenOfNext = readSparse(buf, offset + readLen, numToRead - readLen); 796 if (readLenOfNext == -1) { 797 return readLen; 798 } 799 800 return readLen + readLenOfNext; 801 } 802 803 // if the rest data of current input stream is enough(which means readLen == len), just return readLen 804 return readLen; 805 } 806 807 /** 808 * Since we do not support marking just yet, we do nothing. 809 */ 810 @Override 811 public synchronized void reset() { 812 } 813 814 protected final void setAtEOF(final boolean b) { 815 hasHitEOF = b; 816 } 817 818 protected final void setCurrentEntry(final TarArchiveEntry e) { 819 currEntry = e; 820 } 821 822 /** 823 * Skips over and discards {@code n} bytes of data from this input 824 * stream. The {@code skip} method may, for a variety of reasons, end 825 * up skipping over some smaller number of bytes, possibly {@code 0}. 826 * This may result from any of a number of conditions; reaching end of file 827 * or end of entry before {@code n} bytes have been skipped; are only 828 * two possibilities. The actual number of bytes skipped is returned. If 829 * {@code n} is negative, no bytes are skipped. 830 * 831 * 832 * @param n 833 * the number of bytes to be skipped. 834 * @return the actual number of bytes skipped. 835 * @throws IOException if a truncated tar archive is detected 836 * or some other I/O error occurs 837 */ 838 @Override 839 public long skip(final long n) throws IOException { 840 if (n <= 0 || isDirectory()) { 841 return 0; 842 } 843 844 final long availableOfInputStream = inputStream.available(); 845 final long available = currEntry.getRealSize() - entryOffset; 846 final long numToSkip = Math.min(n, available); 847 long skipped; 848 849 if (!currEntry.isSparse()) { 850 skipped = IOUtils.skip(inputStream, numToSkip); 851 // for non-sparse entry, we should get the bytes actually skipped bytes along with 852 // inputStream.available() if inputStream is instance of FileInputStream 853 skipped = getActuallySkipped(availableOfInputStream, skipped, numToSkip); 854 } else { 855 skipped = skipSparse(numToSkip); 856 } 857 858 859 count(skipped); 860 entryOffset += skipped; 861 return skipped; 862 } 863 864 /** 865 * The last record block should be written at the full size, so skip any 866 * additional space used to fill a record after an entry. 867 * 868 * @throws IOException if a truncated tar archive is detected 869 */ 870 private void skipRecordPadding() throws IOException { 871 if (!isDirectory() && this.entrySize > 0 && this.entrySize % this.recordSize != 0) { 872 final long available = inputStream.available(); 873 final long numRecords = (this.entrySize / this.recordSize) + 1; 874 final long padding = (numRecords * this.recordSize) - this.entrySize; 875 long skipped = IOUtils.skip(inputStream, padding); 876 877 skipped = getActuallySkipped(available, skipped, padding); 878 879 count(skipped); 880 } 881 } 882 883 /** 884 * Skip n bytes from current input stream, if the current input stream doesn't have enough data to skip, 885 * jump to the next input stream and skip the rest bytes, keep doing this until total n bytes are skipped 886 * or the input streams are all skipped 887 * 888 * @param n bytes of data to skip 889 * @return actual bytes of data skipped 890 * @throws IOException 891 */ 892 private long skipSparse(final long n) throws IOException { 893 if (sparseInputStreams == null || sparseInputStreams.isEmpty()) { 894 return inputStream.skip(n); 895 } 896 897 long bytesSkipped = 0; 898 899 while (bytesSkipped < n && currentSparseInputStreamIndex < sparseInputStreams.size()) { 900 final InputStream currentInputStream = sparseInputStreams.get(currentSparseInputStreamIndex); 901 bytesSkipped += currentInputStream.skip(n - bytesSkipped); 902 903 if (bytesSkipped < n) { 904 currentSparseInputStreamIndex++; 905 } 906 } 907 908 return bytesSkipped; 909 } 910 911 /** 912 * Tries to read the next record rewinding the stream if it is not an EOF record. 913 * 914 * <p>This is meant to protect against cases where a tar 915 * implementation has written only one EOF record when two are 916 * expected. Actually this won't help since a non-conforming 917 * implementation likely won't fill full blocks consisting of - by 918 * default - ten records either so we probably have already read 919 * beyond the archive anyway.</p> 920 */ 921 private void tryToConsumeSecondEOFRecord() throws IOException { 922 boolean shouldReset = true; 923 final boolean marked = inputStream.markSupported(); 924 if (marked) { 925 inputStream.mark(recordSize); 926 } 927 try { 928 shouldReset = !isEOFRecord(readRecord()); 929 } finally { 930 if (shouldReset && marked) { 931 pushedBackBytes(recordSize); 932 inputStream.reset(); 933 } 934 } 935 } 936}