001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018package org.apache.commons.compress.archivers.sevenz; 019 020import java.io.BufferedInputStream; 021import java.io.ByteArrayInputStream; 022import java.io.Closeable; 023import java.io.DataInputStream; 024import java.io.EOFException; 025import java.io.File; 026import java.io.FilterInputStream; 027import java.io.IOException; 028import java.io.InputStream; 029import java.nio.ByteBuffer; 030import java.nio.ByteOrder; 031import java.nio.CharBuffer; 032import java.nio.channels.SeekableByteChannel; 033import java.nio.charset.StandardCharsets; 034import java.nio.charset.CharsetEncoder; 035import java.nio.file.Files; 036import java.nio.file.StandardOpenOption; 037import java.util.ArrayList; 038import java.util.Arrays; 039import java.util.BitSet; 040import java.util.EnumSet; 041import java.util.HashMap; 042import java.util.LinkedList; 043import java.util.List; 044import java.util.Map; 045import java.util.stream.Collectors; 046import java.util.zip.CRC32; 047 048import org.apache.commons.compress.MemoryLimitException; 049import org.apache.commons.compress.utils.BoundedInputStream; 050import org.apache.commons.compress.utils.ByteUtils; 051import org.apache.commons.compress.utils.CRC32VerifyingInputStream; 052import org.apache.commons.compress.utils.IOUtils; 053import org.apache.commons.compress.utils.InputStreamStatistics; 054 055/** 056 * Reads a 7z file, using SeekableByteChannel under 057 * the covers. 058 * <p> 059 * The 7z file format is a flexible container 060 * that can contain many compression and 061 * encryption types, but at the moment only 062 * only Copy, LZMA, LZMA2, BZIP2, Deflate and AES-256 + SHA-256 063 * are supported. 064 * <p> 065 * The format is very Windows/Intel specific, 066 * so it uses little-endian byte order, 067 * doesn't store user/group or permission bits, 068 * and represents times using NTFS timestamps 069 * (100 nanosecond units since 1 January 1601). 070 * Hence the official tools recommend against 071 * using it for backup purposes on *nix, and 072 * recommend .tar.7z or .tar.lzma or .tar.xz 073 * instead. 074 * <p> 075 * Both the header and file contents may be 076 * compressed and/or encrypted. With both 077 * encrypted, neither file names nor file 078 * contents can be read, but the use of 079 * encryption isn't plausibly deniable. 080 * 081 * <p>Multi volume archives can be read by concatenating the parts in 082 * correct order - either manually or by using {link 083 * org.apache.commons.compress.utils.MultiReadOnlySeekableByteChannel} 084 * for example.</p> 085 * 086 * @NotThreadSafe 087 * @since 1.6 088 */ 089public class SevenZFile implements Closeable { 090 static final int SIGNATURE_HEADER_SIZE = 32; 091 092 private static final String DEFAULT_FILE_NAME = "unknown archive"; 093 094 private final String fileName; 095 private SeekableByteChannel channel; 096 private final Archive archive; 097 private int currentEntryIndex = -1; 098 private int currentFolderIndex = -1; 099 private InputStream currentFolderInputStream; 100 private byte[] password; 101 private final SevenZFileOptions options; 102 103 private long compressedBytesReadFromCurrentEntry; 104 private long uncompressedBytesReadFromCurrentEntry; 105 106 private final ArrayList<InputStream> deferredBlockStreams = new ArrayList<>(); 107 108 // shared with SevenZOutputFile and tests, neither mutates it 109 static final byte[] sevenZSignature = { //NOSONAR 110 (byte)'7', (byte)'z', (byte)0xBC, (byte)0xAF, (byte)0x27, (byte)0x1C 111 }; 112 113 /** 114 * Reads a file as 7z archive 115 * 116 * @param fileName the file to read 117 * @param password optional password if the archive is encrypted 118 * @throws IOException if reading the archive fails 119 * @since 1.17 120 */ 121 public SevenZFile(final File fileName, final char[] password) throws IOException { 122 this(fileName, password, SevenZFileOptions.DEFAULT); 123 } 124 125 /** 126 * Reads a file as 7z archive with additional options. 127 * 128 * @param fileName the file to read 129 * @param password optional password if the archive is encrypted 130 * @param options the options to apply 131 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 132 * @since 1.19 133 */ 134 public SevenZFile(final File fileName, final char[] password, final SevenZFileOptions options) throws IOException { 135 this(Files.newByteChannel(fileName.toPath(), EnumSet.of(StandardOpenOption.READ)), // NOSONAR 136 fileName.getAbsolutePath(), utf16Decode(password), true, options); 137 } 138 139 /** 140 * Reads a file as 7z archive 141 * 142 * @param fileName the file to read 143 * @param password optional password if the archive is encrypted - 144 * the byte array is supposed to be the UTF16-LE encoded 145 * representation of the password. 146 * @throws IOException if reading the archive fails 147 * @deprecated use the char[]-arg version for the password instead 148 */ 149 @Deprecated 150 public SevenZFile(final File fileName, final byte[] password) throws IOException { 151 this(Files.newByteChannel(fileName.toPath(), EnumSet.of(StandardOpenOption.READ)), 152 fileName.getAbsolutePath(), password, true, SevenZFileOptions.DEFAULT); 153 } 154 155 /** 156 * Reads a SeekableByteChannel as 7z archive 157 * 158 * <p>{@link 159 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 160 * allows you to read from an in-memory archive.</p> 161 * 162 * @param channel the channel to read 163 * @throws IOException if reading the archive fails 164 * @since 1.13 165 */ 166 public SevenZFile(final SeekableByteChannel channel) throws IOException { 167 this(channel, SevenZFileOptions.DEFAULT); 168 } 169 170 /** 171 * Reads a SeekableByteChannel as 7z archive with addtional options. 172 * 173 * <p>{@link 174 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 175 * allows you to read from an in-memory archive.</p> 176 * 177 * @param channel the channel to read 178 * @param options the options to apply 179 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 180 * @since 1.19 181 */ 182 public SevenZFile(final SeekableByteChannel channel, final SevenZFileOptions options) throws IOException { 183 this(channel, DEFAULT_FILE_NAME, null, options); 184 } 185 186 /** 187 * Reads a SeekableByteChannel as 7z archive 188 * 189 * <p>{@link 190 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 191 * allows you to read from an in-memory archive.</p> 192 * 193 * @param channel the channel to read 194 * @param password optional password if the archive is encrypted 195 * @throws IOException if reading the archive fails 196 * @since 1.17 197 */ 198 public SevenZFile(final SeekableByteChannel channel, 199 final char[] password) throws IOException { 200 this(channel, password, SevenZFileOptions.DEFAULT); 201 } 202 203 /** 204 * Reads a SeekableByteChannel as 7z archive with additional options. 205 * 206 * <p>{@link 207 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 208 * allows you to read from an in-memory archive.</p> 209 * 210 * @param channel the channel to read 211 * @param password optional password if the archive is encrypted 212 * @param options the options to apply 213 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 214 * @since 1.19 215 */ 216 public SevenZFile(final SeekableByteChannel channel, final char[] password, final SevenZFileOptions options) 217 throws IOException { 218 this(channel, DEFAULT_FILE_NAME, password, options); 219 } 220 221 /** 222 * Reads a SeekableByteChannel as 7z archive 223 * 224 * <p>{@link 225 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 226 * allows you to read from an in-memory archive.</p> 227 * 228 * @param channel the channel to read 229 * @param fileName name of the archive - only used for error reporting 230 * @param password optional password if the archive is encrypted 231 * @throws IOException if reading the archive fails 232 * @since 1.17 233 */ 234 public SevenZFile(final SeekableByteChannel channel, final String fileName, 235 final char[] password) throws IOException { 236 this(channel, fileName, password, SevenZFileOptions.DEFAULT); 237 } 238 239 /** 240 * Reads a SeekableByteChannel as 7z archive with addtional options. 241 * 242 * <p>{@link 243 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 244 * allows you to read from an in-memory archive.</p> 245 * 246 * @param channel the channel to read 247 * @param fileName name of the archive - only used for error reporting 248 * @param password optional password if the archive is encrypted 249 * @param options the options to apply 250 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 251 * @since 1.19 252 */ 253 public SevenZFile(final SeekableByteChannel channel, final String fileName, final char[] password, 254 final SevenZFileOptions options) throws IOException { 255 this(channel, fileName, utf16Decode(password), false, options); 256 } 257 258 /** 259 * Reads a SeekableByteChannel as 7z archive 260 * 261 * <p>{@link 262 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 263 * allows you to read from an in-memory archive.</p> 264 * 265 * @param channel the channel to read 266 * @param fileName name of the archive - only used for error reporting 267 * @throws IOException if reading the archive fails 268 * @since 1.17 269 */ 270 public SevenZFile(final SeekableByteChannel channel, final String fileName) 271 throws IOException { 272 this(channel, fileName, SevenZFileOptions.DEFAULT); 273 } 274 275 /** 276 * Reads a SeekableByteChannel as 7z archive with additional options. 277 * 278 * <p>{@link 279 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 280 * allows you to read from an in-memory archive.</p> 281 * 282 * @param channel the channel to read 283 * @param fileName name of the archive - only used for error reporting 284 * @param options the options to apply 285 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 286 * @since 1.19 287 */ 288 public SevenZFile(final SeekableByteChannel channel, final String fileName, final SevenZFileOptions options) 289 throws IOException { 290 this(channel, fileName, null, false, options); 291 } 292 293 /** 294 * Reads a SeekableByteChannel as 7z archive 295 * 296 * <p>{@link 297 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 298 * allows you to read from an in-memory archive.</p> 299 * 300 * @param channel the channel to read 301 * @param password optional password if the archive is encrypted - 302 * the byte array is supposed to be the UTF16-LE encoded 303 * representation of the password. 304 * @throws IOException if reading the archive fails 305 * @since 1.13 306 * @deprecated use the char[]-arg version for the password instead 307 */ 308 @Deprecated 309 public SevenZFile(final SeekableByteChannel channel, 310 final byte[] password) throws IOException { 311 this(channel, DEFAULT_FILE_NAME, password); 312 } 313 314 /** 315 * Reads a SeekableByteChannel as 7z archive 316 * 317 * <p>{@link 318 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 319 * allows you to read from an in-memory archive.</p> 320 * 321 * @param channel the channel to read 322 * @param fileName name of the archive - only used for error reporting 323 * @param password optional password if the archive is encrypted - 324 * the byte array is supposed to be the UTF16-LE encoded 325 * representation of the password. 326 * @throws IOException if reading the archive fails 327 * @since 1.13 328 * @deprecated use the char[]-arg version for the password instead 329 */ 330 @Deprecated 331 public SevenZFile(final SeekableByteChannel channel, final String fileName, 332 final byte[] password) throws IOException { 333 this(channel, fileName, password, false, SevenZFileOptions.DEFAULT); 334 } 335 336 private SevenZFile(final SeekableByteChannel channel, final String filename, 337 final byte[] password, final boolean closeOnError, final SevenZFileOptions options) throws IOException { 338 boolean succeeded = false; 339 this.channel = channel; 340 this.fileName = filename; 341 this.options = options; 342 try { 343 archive = readHeaders(password); 344 if (password != null) { 345 this.password = Arrays.copyOf(password, password.length); 346 } else { 347 this.password = null; 348 } 349 succeeded = true; 350 } finally { 351 if (!succeeded && closeOnError) { 352 this.channel.close(); 353 } 354 } 355 } 356 357 /** 358 * Reads a file as unencrypted 7z archive 359 * 360 * @param fileName the file to read 361 * @throws IOException if reading the archive fails 362 */ 363 public SevenZFile(final File fileName) throws IOException { 364 this(fileName, SevenZFileOptions.DEFAULT); 365 } 366 367 /** 368 * Reads a file as unencrypted 7z archive 369 * 370 * @param fileName the file to read 371 * @param options the options to apply 372 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 373 * @since 1.19 374 */ 375 public SevenZFile(final File fileName, final SevenZFileOptions options) throws IOException { 376 this(fileName, null, options); 377 } 378 379 /** 380 * Closes the archive. 381 * @throws IOException if closing the file fails 382 */ 383 @Override 384 public void close() throws IOException { 385 if (channel != null) { 386 try { 387 channel.close(); 388 } finally { 389 channel = null; 390 if (password != null) { 391 Arrays.fill(password, (byte) 0); 392 } 393 password = null; 394 } 395 } 396 } 397 398 /** 399 * Returns the next Archive Entry in this archive. 400 * 401 * @return the next entry, 402 * or {@code null} if there are no more entries 403 * @throws IOException if the next entry could not be read 404 */ 405 public SevenZArchiveEntry getNextEntry() throws IOException { 406 if (currentEntryIndex >= archive.files.length - 1) { 407 return null; 408 } 409 ++currentEntryIndex; 410 final SevenZArchiveEntry entry = archive.files[currentEntryIndex]; 411 if (entry.getName() == null && options.getUseDefaultNameForUnnamedEntries()) { 412 entry.setName(getDefaultName()); 413 } 414 buildDecodingStream(currentEntryIndex, false); 415 uncompressedBytesReadFromCurrentEntry = compressedBytesReadFromCurrentEntry = 0; 416 return entry; 417 } 418 419 /** 420 * Returns a copy of meta-data of all archive entries. 421 * 422 * <p>This method only provides meta-data, the entries can not be 423 * used to read the contents, you still need to process all 424 * entries in order using {@link #getNextEntry} for that.</p> 425 * 426 * <p>The content methods are only available for entries that have 427 * already been reached via {@link #getNextEntry}.</p> 428 * 429 * @return a copy of meta-data of all archive entries. 430 * @since 1.11 431 */ 432 public Iterable<SevenZArchiveEntry> getEntries() { 433 return new ArrayList<>(Arrays.asList(archive.files)); 434 } 435 436 private Archive readHeaders(final byte[] password) throws IOException { 437 final ByteBuffer buf = ByteBuffer.allocate(12 /* signature + 2 bytes version + 4 bytes CRC */) 438 .order(ByteOrder.LITTLE_ENDIAN); 439 readFully(buf); 440 final byte[] signature = new byte[6]; 441 buf.get(signature); 442 if (!Arrays.equals(signature, sevenZSignature)) { 443 throw new IOException("Bad 7z signature"); 444 } 445 // 7zFormat.txt has it wrong - it's first major then minor 446 final byte archiveVersionMajor = buf.get(); 447 final byte archiveVersionMinor = buf.get(); 448 if (archiveVersionMajor != 0) { 449 throw new IOException(String.format("Unsupported 7z version (%d,%d)", 450 archiveVersionMajor, archiveVersionMinor)); 451 } 452 453 boolean headerLooksValid = false; // See https://www.7-zip.org/recover.html - "There is no correct End Header at the end of archive" 454 final long startHeaderCrc = 0xffffFFFFL & buf.getInt(); 455 if (startHeaderCrc == 0) { 456 // This is an indication of a corrupt header - peek the next 20 bytes 457 final long currentPosition = channel.position(); 458 final ByteBuffer peekBuf = ByteBuffer.allocate(20); 459 readFully(peekBuf); 460 channel.position(currentPosition); 461 // Header invalid if all data is 0 462 while (peekBuf.hasRemaining()) { 463 if (peekBuf.get()!=0) { 464 headerLooksValid = true; 465 break; 466 } 467 } 468 } else { 469 headerLooksValid = true; 470 } 471 472 if (headerLooksValid) { 473 final StartHeader startHeader = readStartHeader(startHeaderCrc); 474 return initializeArchive(startHeader, password, true); 475 } 476 // No valid header found - probably first file of multipart archive was removed too early. Scan for end header. 477 if (options.getTryToRecoverBrokenArchives()) { 478 return tryToLocateEndHeader(password); 479 } 480 throw new IOException("archive seems to be invalid.\nYou may want to retry and enable the" 481 + " tryToRecoverBrokenArchives if the archive could be a multi volume archive that has been closed" 482 + " prematurely."); 483 } 484 485 private Archive tryToLocateEndHeader(final byte[] password) throws IOException { 486 final ByteBuffer nidBuf = ByteBuffer.allocate(1); 487 final long searchLimit = 1024L * 1024 * 1; 488 // Main header, plus bytes that readStartHeader would read 489 final long previousDataSize = channel.position() + 20; 490 final long minPos; 491 // Determine minimal position - can't start before current position 492 if (channel.position() + searchLimit > channel.size()) { 493 minPos = channel.position(); 494 } else { 495 minPos = channel.size() - searchLimit; 496 } 497 long pos = channel.size() - 1; 498 // Loop: Try from end of archive 499 while (pos > minPos) { 500 pos--; 501 channel.position(pos); 502 nidBuf.rewind(); 503 if (channel.read(nidBuf) < 1) { 504 throw new EOFException(); 505 } 506 final int nid = nidBuf.array()[0]; 507 // First indicator: Byte equals one of these header identifiers 508 if (nid == NID.kEncodedHeader || nid == NID.kHeader) { 509 try { 510 // Try to initialize Archive structure from here 511 final StartHeader startHeader = new StartHeader(); 512 startHeader.nextHeaderOffset = pos - previousDataSize; 513 startHeader.nextHeaderSize = channel.size() - pos; 514 final Archive result = initializeArchive(startHeader, password, false); 515 // Sanity check: There must be some data... 516 if (result.packSizes.length > 0 && result.files.length > 0) { 517 return result; 518 } 519 } catch (final Exception ignore) { 520 // Wrong guess... 521 } 522 } 523 } 524 throw new IOException("Start header corrupt and unable to guess end header"); 525 } 526 527 private Archive initializeArchive(final StartHeader startHeader, final byte[] password, final boolean verifyCrc) throws IOException { 528 assertFitsIntoNonNegativeInt("nextHeaderSize", startHeader.nextHeaderSize); 529 final int nextHeaderSizeInt = (int) startHeader.nextHeaderSize; 530 channel.position(SIGNATURE_HEADER_SIZE + startHeader.nextHeaderOffset); 531 ByteBuffer buf = ByteBuffer.allocate(nextHeaderSizeInt).order(ByteOrder.LITTLE_ENDIAN); 532 readFully(buf); 533 if (verifyCrc) { 534 final CRC32 crc = new CRC32(); 535 crc.update(buf.array()); 536 if (startHeader.nextHeaderCrc != crc.getValue()) { 537 throw new IOException("NextHeader CRC mismatch"); 538 } 539 } 540 541 Archive archive = new Archive(); 542 int nid = getUnsignedByte(buf); 543 if (nid == NID.kEncodedHeader) { 544 buf = readEncodedHeader(buf, archive, password); 545 // Archive gets rebuilt with the new header 546 archive = new Archive(); 547 nid = getUnsignedByte(buf); 548 } 549 if (nid != NID.kHeader) { 550 throw new IOException("Broken or unsupported archive: no Header"); 551 } 552 readHeader(buf, archive); 553 archive.subStreamsInfo = null; 554 return archive; 555 } 556 557 private StartHeader readStartHeader(final long startHeaderCrc) throws IOException { 558 final StartHeader startHeader = new StartHeader(); 559 // using Stream rather than ByteBuffer for the benefit of the 560 // built-in CRC check 561 try (DataInputStream dataInputStream = new DataInputStream(new CRC32VerifyingInputStream( 562 new BoundedSeekableByteChannelInputStream(channel, 20), 20, startHeaderCrc))) { 563 startHeader.nextHeaderOffset = Long.reverseBytes(dataInputStream.readLong()); 564 if (startHeader.nextHeaderOffset < 0 565 || startHeader.nextHeaderOffset + SIGNATURE_HEADER_SIZE > channel.size()) { 566 throw new IOException("nextHeaderOffset is out of bounds"); 567 } 568 569 startHeader.nextHeaderSize = Long.reverseBytes(dataInputStream.readLong()); 570 final long nextHeaderEnd = startHeader.nextHeaderOffset + startHeader.nextHeaderSize; 571 if (nextHeaderEnd < startHeader.nextHeaderOffset 572 || nextHeaderEnd + SIGNATURE_HEADER_SIZE > channel.size()) { 573 throw new IOException("nextHeaderSize is out of bounds"); 574 } 575 576 startHeader.nextHeaderCrc = 0xffffFFFFL & Integer.reverseBytes(dataInputStream.readInt()); 577 578 return startHeader; 579 } 580 } 581 582 private void readHeader(final ByteBuffer header, final Archive archive) throws IOException { 583 final int pos = header.position(); 584 final ArchiveStatistics stats = sanityCheckAndCollectStatistics(header); 585 stats.assertValidity(options.getMaxMemoryLimitInKb()); 586 header.position(pos); 587 588 int nid = getUnsignedByte(header); 589 590 if (nid == NID.kArchiveProperties) { 591 readArchiveProperties(header); 592 nid = getUnsignedByte(header); 593 } 594 595 if (nid == NID.kAdditionalStreamsInfo) { 596 throw new IOException("Additional streams unsupported"); 597 //nid = getUnsignedByte(header); 598 } 599 600 if (nid == NID.kMainStreamsInfo) { 601 readStreamsInfo(header, archive); 602 nid = getUnsignedByte(header); 603 } 604 605 if (nid == NID.kFilesInfo) { 606 readFilesInfo(header, archive); 607 nid = getUnsignedByte(header); 608 } 609 } 610 611 private ArchiveStatistics sanityCheckAndCollectStatistics(final ByteBuffer header) 612 throws IOException { 613 final ArchiveStatistics stats = new ArchiveStatistics(); 614 615 int nid = getUnsignedByte(header); 616 617 if (nid == NID.kArchiveProperties) { 618 sanityCheckArchiveProperties(header); 619 nid = getUnsignedByte(header); 620 } 621 622 if (nid == NID.kAdditionalStreamsInfo) { 623 throw new IOException("Additional streams unsupported"); 624 //nid = getUnsignedByte(header); 625 } 626 627 if (nid == NID.kMainStreamsInfo) { 628 sanityCheckStreamsInfo(header, stats); 629 nid = getUnsignedByte(header); 630 } 631 632 if (nid == NID.kFilesInfo) { 633 sanityCheckFilesInfo(header, stats); 634 nid = getUnsignedByte(header); 635 } 636 637 if (nid != NID.kEnd) { 638 throw new IOException("Badly terminated header, found " + nid); 639 } 640 641 return stats; 642 } 643 644 private void readArchiveProperties(final ByteBuffer input) throws IOException { 645 // FIXME: the reference implementation just throws them away? 646 int nid = getUnsignedByte(input); 647 while (nid != NID.kEnd) { 648 final long propertySize = readUint64(input); 649 final byte[] property = new byte[(int)propertySize]; 650 get(input, property); 651 nid = getUnsignedByte(input); 652 } 653 } 654 655 private void sanityCheckArchiveProperties(final ByteBuffer header) 656 throws IOException { 657 int nid = getUnsignedByte(header); 658 while (nid != NID.kEnd) { 659 final int propertySize = 660 assertFitsIntoNonNegativeInt("propertySize", readUint64(header)); 661 if (skipBytesFully(header, propertySize) < propertySize) { 662 throw new IOException("invalid property size"); 663 } 664 nid = getUnsignedByte(header); 665 } 666 } 667 668 private ByteBuffer readEncodedHeader(final ByteBuffer header, final Archive archive, 669 final byte[] password) throws IOException { 670 final int pos = header.position(); 671 ArchiveStatistics stats = new ArchiveStatistics(); 672 sanityCheckStreamsInfo(header, stats); 673 stats.assertValidity(options.getMaxMemoryLimitInKb()); 674 header.position(pos); 675 676 readStreamsInfo(header, archive); 677 678 if (archive.folders == null || archive.folders.length == 0) { 679 throw new IOException("no folders, can't read encoded header"); 680 } 681 if (archive.packSizes == null || archive.packSizes.length == 0) { 682 throw new IOException("no packed streams, can't read encoded header"); 683 } 684 685 // FIXME: merge with buildDecodingStream()/buildDecoderStack() at some stage? 686 final Folder folder = archive.folders[0]; 687 final int firstPackStreamIndex = 0; 688 final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos + 689 0; 690 691 channel.position(folderOffset); 692 InputStream inputStreamStack = new BoundedSeekableByteChannelInputStream(channel, 693 archive.packSizes[firstPackStreamIndex]); 694 for (final Coder coder : folder.getOrderedCoders()) { 695 if (coder.numInStreams != 1 || coder.numOutStreams != 1) { 696 throw new IOException("Multi input/output stream coders are not yet supported"); 697 } 698 inputStreamStack = Coders.addDecoder(fileName, inputStreamStack, //NOSONAR 699 folder.getUnpackSizeForCoder(coder), coder, password, options.getMaxMemoryLimitInKb()); 700 } 701 if (folder.hasCrc) { 702 inputStreamStack = new CRC32VerifyingInputStream(inputStreamStack, 703 folder.getUnpackSize(), folder.crc); 704 } 705 final int unpackSize = assertFitsIntoNonNegativeInt("unpackSize", folder.getUnpackSize()); 706 final byte[] nextHeader = IOUtils.readRange(inputStreamStack, unpackSize); 707 if (nextHeader.length < unpackSize) { 708 throw new IOException("premature end of stream"); 709 } 710 inputStreamStack.close(); 711 return ByteBuffer.wrap(nextHeader).order(ByteOrder.LITTLE_ENDIAN); 712 } 713 714 private void sanityCheckStreamsInfo(final ByteBuffer header, 715 final ArchiveStatistics stats) throws IOException { 716 int nid = getUnsignedByte(header); 717 718 if (nid == NID.kPackInfo) { 719 sanityCheckPackInfo(header, stats); 720 nid = getUnsignedByte(header); 721 } 722 723 if (nid == NID.kUnpackInfo) { 724 sanityCheckUnpackInfo(header, stats); 725 nid = getUnsignedByte(header); 726 } 727 728 if (nid == NID.kSubStreamsInfo) { 729 sanityCheckSubStreamsInfo(header, stats); 730 nid = getUnsignedByte(header); 731 } 732 733 if (nid != NID.kEnd) { 734 throw new IOException("Badly terminated StreamsInfo"); 735 } 736 } 737 738 private void readStreamsInfo(final ByteBuffer header, final Archive archive) throws IOException { 739 int nid = getUnsignedByte(header); 740 741 if (nid == NID.kPackInfo) { 742 readPackInfo(header, archive); 743 nid = getUnsignedByte(header); 744 } 745 746 if (nid == NID.kUnpackInfo) { 747 readUnpackInfo(header, archive); 748 nid = getUnsignedByte(header); 749 } else { 750 // archive without unpack/coders info 751 archive.folders = Folder.EMPTY_FOLDER_ARRAY; 752 } 753 754 if (nid == NID.kSubStreamsInfo) { 755 readSubStreamsInfo(header, archive); 756 nid = getUnsignedByte(header); 757 } 758 } 759 760 private void sanityCheckPackInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { 761 final long packPos = readUint64(header); 762 if (packPos < 0 || SIGNATURE_HEADER_SIZE + packPos > channel.size() 763 || SIGNATURE_HEADER_SIZE + packPos < 0) { 764 throw new IOException("packPos (" + packPos + ") is out of range"); 765 } 766 final long numPackStreams = readUint64(header); 767 stats.numberOfPackedStreams = assertFitsIntoNonNegativeInt("numPackStreams", numPackStreams); 768 int nid = getUnsignedByte(header); 769 if (nid == NID.kSize) { 770 long totalPackSizes = 0; 771 for (int i = 0; i < stats.numberOfPackedStreams; i++) { 772 final long packSize = readUint64(header); 773 totalPackSizes += packSize; 774 final long endOfPackStreams = SIGNATURE_HEADER_SIZE + packPos + totalPackSizes; 775 if (packSize < 0 776 || endOfPackStreams > channel.size() 777 || endOfPackStreams < packPos) { 778 throw new IOException("packSize (" + packSize + ") is out of range"); 779 } 780 } 781 nid = getUnsignedByte(header); 782 } 783 784 if (nid == NID.kCRC) { 785 final int crcsDefined = readAllOrBits(header, stats.numberOfPackedStreams) 786 .cardinality(); 787 if (skipBytesFully(header, 4 * crcsDefined) < 4 * crcsDefined) { 788 throw new IOException("invalid number of CRCs in PackInfo"); 789 } 790 nid = getUnsignedByte(header); 791 } 792 793 if (nid != NID.kEnd) { 794 throw new IOException("Badly terminated PackInfo (" + nid + ")"); 795 } 796 } 797 798 private void readPackInfo(final ByteBuffer header, final Archive archive) throws IOException { 799 archive.packPos = readUint64(header); 800 final int numPackStreamsInt = (int) readUint64(header); 801 int nid = getUnsignedByte(header); 802 if (nid == NID.kSize) { 803 archive.packSizes = new long[numPackStreamsInt]; 804 for (int i = 0; i < archive.packSizes.length; i++) { 805 archive.packSizes[i] = readUint64(header); 806 } 807 nid = getUnsignedByte(header); 808 } 809 810 if (nid == NID.kCRC) { 811 archive.packCrcsDefined = readAllOrBits(header, numPackStreamsInt); 812 archive.packCrcs = new long[numPackStreamsInt]; 813 for (int i = 0; i < numPackStreamsInt; i++) { 814 if (archive.packCrcsDefined.get(i)) { 815 archive.packCrcs[i] = 0xffffFFFFL & getInt(header); 816 } 817 } 818 819 nid = getUnsignedByte(header); 820 } 821 } 822 823 private void sanityCheckUnpackInfo(final ByteBuffer header, final ArchiveStatistics stats) 824 throws IOException { 825 int nid = getUnsignedByte(header); 826 if (nid != NID.kFolder) { 827 throw new IOException("Expected kFolder, got " + nid); 828 } 829 final long numFolders = readUint64(header); 830 stats.numberOfFolders = assertFitsIntoNonNegativeInt("numFolders", numFolders); 831 final int external = getUnsignedByte(header); 832 if (external != 0) { 833 throw new IOException("External unsupported"); 834 } 835 836 final List<Integer> numberOfOutputStreamsPerFolder = new LinkedList<>(); 837 for (int i = 0; i < stats.numberOfFolders; i++) { 838 numberOfOutputStreamsPerFolder.add(sanityCheckFolder(header, stats)); 839 } 840 841 final long totalNumberOfBindPairs = stats.numberOfOutStreams - stats.numberOfFolders; 842 final long packedStreamsRequiredByFolders = stats.numberOfInStreams - totalNumberOfBindPairs; 843 if (packedStreamsRequiredByFolders < stats.numberOfPackedStreams) { 844 throw new IOException("archive doesn't contain enough packed streams"); 845 } 846 847 nid = getUnsignedByte(header); 848 if (nid != NID.kCodersUnpackSize) { 849 throw new IOException("Expected kCodersUnpackSize, got " + nid); 850 } 851 852 for (int numberOfOutputStreams : numberOfOutputStreamsPerFolder) { 853 for (int i = 0; i < numberOfOutputStreams; i++) { 854 final long unpackSize = readUint64(header); 855 if (unpackSize < 0) { 856 throw new IllegalArgumentException("negative unpackSize"); 857 } 858 } 859 } 860 861 nid = getUnsignedByte(header); 862 if (nid == NID.kCRC) { 863 stats.folderHasCrc = readAllOrBits(header, stats.numberOfFolders); 864 final int crcsDefined = stats.folderHasCrc.cardinality(); 865 if (skipBytesFully(header, 4 * crcsDefined) < 4 * crcsDefined) { 866 throw new IOException("invalid number of CRCs in UnpackInfo"); 867 } 868 nid = getUnsignedByte(header); 869 } 870 871 if (nid != NID.kEnd) { 872 throw new IOException("Badly terminated UnpackInfo"); 873 } 874 } 875 876 private void readUnpackInfo(final ByteBuffer header, final Archive archive) throws IOException { 877 int nid = getUnsignedByte(header); 878 final int numFoldersInt = (int) readUint64(header); 879 final Folder[] folders = new Folder[numFoldersInt]; 880 archive.folders = folders; 881 /* final int external = */ getUnsignedByte(header); 882 for (int i = 0; i < numFoldersInt; i++) { 883 folders[i] = readFolder(header); 884 } 885 886 nid = getUnsignedByte(header); 887 for (final Folder folder : folders) { 888 assertFitsIntoNonNegativeInt("totalOutputStreams", folder.totalOutputStreams); 889 folder.unpackSizes = new long[(int)folder.totalOutputStreams]; 890 for (int i = 0; i < folder.totalOutputStreams; i++) { 891 folder.unpackSizes[i] = readUint64(header); 892 } 893 } 894 895 nid = getUnsignedByte(header); 896 if (nid == NID.kCRC) { 897 final BitSet crcsDefined = readAllOrBits(header, numFoldersInt); 898 for (int i = 0; i < numFoldersInt; i++) { 899 if (crcsDefined.get(i)) { 900 folders[i].hasCrc = true; 901 folders[i].crc = 0xffffFFFFL & getInt(header); 902 } else { 903 folders[i].hasCrc = false; 904 } 905 } 906 907 nid = getUnsignedByte(header); 908 } 909 } 910 911 private void sanityCheckSubStreamsInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { 912 913 int nid = getUnsignedByte(header); 914 final List<Integer> numUnpackSubStreamsPerFolder = new LinkedList<>(); 915 if (nid == NID.kNumUnpackStream) { 916 for (int i = 0; i < stats.numberOfFolders; i++) { 917 numUnpackSubStreamsPerFolder.add(assertFitsIntoNonNegativeInt("numStreams", readUint64(header))); 918 } 919 stats.numberOfUnpackSubStreams = numUnpackSubStreamsPerFolder.stream().collect(Collectors.summingLong(Integer::longValue)); 920 nid = getUnsignedByte(header); 921 } else { 922 stats.numberOfUnpackSubStreams = stats.numberOfFolders; 923 } 924 925 assertFitsIntoNonNegativeInt("totalUnpackStreams", stats.numberOfUnpackSubStreams); 926 927 if (nid == NID.kSize) { 928 for (final int numUnpackSubStreams : numUnpackSubStreamsPerFolder) { 929 if (numUnpackSubStreams == 0) { 930 continue; 931 } 932 for (int i = 0; i < numUnpackSubStreams - 1; i++) { 933 final long size = readUint64(header); 934 if (size < 0) { 935 throw new IOException("negative unpackSize"); 936 } 937 } 938 } 939 nid = getUnsignedByte(header); 940 } 941 942 int numDigests = 0; 943 if (numUnpackSubStreamsPerFolder.isEmpty()) { 944 numDigests = stats.folderHasCrc == null ? stats.numberOfFolders 945 : stats.numberOfFolders - stats.folderHasCrc.cardinality(); 946 } else { 947 int folderIdx = 0; 948 for (final int numUnpackSubStreams : numUnpackSubStreamsPerFolder) { 949 if (numUnpackSubStreams != 1 || stats.folderHasCrc == null 950 || !stats.folderHasCrc.get(folderIdx++)) { 951 numDigests += numUnpackSubStreams; 952 } 953 } 954 } 955 956 if (nid == NID.kCRC) { 957 assertFitsIntoNonNegativeInt("numDigests", numDigests); 958 final int missingCrcs = readAllOrBits(header, numDigests) 959 .cardinality(); 960 if (skipBytesFully(header, 4 * missingCrcs) < 4 * missingCrcs) { 961 throw new IOException("invalid number of missing CRCs in SubStreamInfo"); 962 } 963 nid = getUnsignedByte(header); 964 } 965 966 if (nid != NID.kEnd) { 967 throw new IOException("Badly terminated SubStreamsInfo"); 968 } 969 } 970 971 private void readSubStreamsInfo(final ByteBuffer header, final Archive archive) throws IOException { 972 for (final Folder folder : archive.folders) { 973 folder.numUnpackSubStreams = 1; 974 } 975 long unpackStreamsCount = archive.folders.length; 976 977 int nid = getUnsignedByte(header); 978 if (nid == NID.kNumUnpackStream) { 979 unpackStreamsCount = 0; 980 for (final Folder folder : archive.folders) { 981 final long numStreams = readUint64(header); 982 folder.numUnpackSubStreams = (int)numStreams; 983 unpackStreamsCount += numStreams; 984 } 985 nid = getUnsignedByte(header); 986 } 987 988 final int totalUnpackStreams = (int) unpackStreamsCount; 989 final SubStreamsInfo subStreamsInfo = new SubStreamsInfo(); 990 subStreamsInfo.unpackSizes = new long[totalUnpackStreams]; 991 subStreamsInfo.hasCrc = new BitSet(totalUnpackStreams); 992 subStreamsInfo.crcs = new long[totalUnpackStreams]; 993 994 int nextUnpackStream = 0; 995 for (final Folder folder : archive.folders) { 996 if (folder.numUnpackSubStreams == 0) { 997 continue; 998 } 999 long sum = 0; 1000 if (nid == NID.kSize) { 1001 for (int i = 0; i < folder.numUnpackSubStreams - 1; i++) { 1002 final long size = readUint64(header); 1003 subStreamsInfo.unpackSizes[nextUnpackStream++] = size; 1004 sum += size; 1005 } 1006 } 1007 if (sum > folder.getUnpackSize()) { 1008 throw new IOException("sum of unpack sizes of folder exceeds total unpack size"); 1009 } 1010 subStreamsInfo.unpackSizes[nextUnpackStream++] = folder.getUnpackSize() - sum; 1011 } 1012 if (nid == NID.kSize) { 1013 nid = getUnsignedByte(header); 1014 } 1015 1016 int numDigests = 0; 1017 for (final Folder folder : archive.folders) { 1018 if (folder.numUnpackSubStreams != 1 || !folder.hasCrc) { 1019 numDigests += folder.numUnpackSubStreams; 1020 } 1021 } 1022 1023 if (nid == NID.kCRC) { 1024 final BitSet hasMissingCrc = readAllOrBits(header, numDigests); 1025 final long[] missingCrcs = new long[numDigests]; 1026 for (int i = 0; i < numDigests; i++) { 1027 if (hasMissingCrc.get(i)) { 1028 missingCrcs[i] = 0xffffFFFFL & getInt(header); 1029 } 1030 } 1031 int nextCrc = 0; 1032 int nextMissingCrc = 0; 1033 for (final Folder folder: archive.folders) { 1034 if (folder.numUnpackSubStreams == 1 && folder.hasCrc) { 1035 subStreamsInfo.hasCrc.set(nextCrc, true); 1036 subStreamsInfo.crcs[nextCrc] = folder.crc; 1037 ++nextCrc; 1038 } else { 1039 for (int i = 0; i < folder.numUnpackSubStreams; i++) { 1040 subStreamsInfo.hasCrc.set(nextCrc, hasMissingCrc.get(nextMissingCrc)); 1041 subStreamsInfo.crcs[nextCrc] = missingCrcs[nextMissingCrc]; 1042 ++nextCrc; 1043 ++nextMissingCrc; 1044 } 1045 } 1046 } 1047 1048 nid = getUnsignedByte(header); 1049 } 1050 1051 archive.subStreamsInfo = subStreamsInfo; 1052 } 1053 1054 private int sanityCheckFolder(final ByteBuffer header, final ArchiveStatistics stats) 1055 throws IOException { 1056 1057 final int numCoders = assertFitsIntoNonNegativeInt("numCoders", readUint64(header)); 1058 if (numCoders == 0) { 1059 throw new IOException("Folder without coders"); 1060 } 1061 stats.numberOfCoders += numCoders; 1062 1063 long totalOutStreams = 0; 1064 long totalInStreams = 0; 1065 for (int i = 0; i < numCoders; i++) { 1066 final int bits = getUnsignedByte(header); 1067 final int idSize = bits & 0xf; 1068 get(header, new byte[idSize]); 1069 1070 final boolean isSimple = (bits & 0x10) == 0; 1071 final boolean hasAttributes = (bits & 0x20) != 0; 1072 final boolean moreAlternativeMethods = (bits & 0x80) != 0; 1073 if (moreAlternativeMethods) { 1074 throw new IOException("Alternative methods are unsupported, please report. " + // NOSONAR 1075 "The reference implementation doesn't support them either."); 1076 } 1077 1078 if (isSimple) { 1079 totalInStreams++; 1080 totalOutStreams++; 1081 } else { 1082 totalInStreams += 1083 assertFitsIntoNonNegativeInt("numInStreams", readUint64(header)); 1084 totalOutStreams += 1085 assertFitsIntoNonNegativeInt("numOutStreams", readUint64(header)); 1086 } 1087 1088 if (hasAttributes) { 1089 final int propertiesSize = 1090 assertFitsIntoNonNegativeInt("propertiesSize", readUint64(header)); 1091 if (skipBytesFully(header, propertiesSize) < propertiesSize) { 1092 throw new IOException("invalid propertiesSize in folder"); 1093 } 1094 } 1095 } 1096 assertFitsIntoNonNegativeInt("totalInStreams", totalInStreams); 1097 assertFitsIntoNonNegativeInt("totalOutStreams", totalOutStreams); 1098 stats.numberOfOutStreams += totalOutStreams; 1099 stats.numberOfInStreams += totalInStreams; 1100 1101 if (totalOutStreams == 0) { 1102 throw new IOException("Total output streams can't be 0"); 1103 } 1104 1105 final int numBindPairs = 1106 assertFitsIntoNonNegativeInt("numBindPairs", totalOutStreams - 1); 1107 if (totalInStreams < numBindPairs) { 1108 throw new IOException("Total input streams can't be less than the number of bind pairs"); 1109 } 1110 final BitSet inStreamsBound = new BitSet((int) totalInStreams); 1111 for (int i = 0; i < numBindPairs; i++) { 1112 final int inIndex = assertFitsIntoNonNegativeInt("inIndex", readUint64(header)); 1113 if (totalInStreams <= inIndex) { 1114 throw new IOException("inIndex is bigger than number of inStreams"); 1115 } 1116 inStreamsBound.set(inIndex); 1117 final int outIndex = assertFitsIntoNonNegativeInt("outIndex", readUint64(header)); 1118 if (totalOutStreams <= outIndex) { 1119 throw new IOException("outIndex is bigger than number of outStreams"); 1120 } 1121 } 1122 1123 final int numPackedStreams = 1124 assertFitsIntoNonNegativeInt("numPackedStreams", totalInStreams - numBindPairs); 1125 1126 if (numPackedStreams == 1) { 1127 if (inStreamsBound.nextClearBit(0) == -1) { 1128 throw new IOException("Couldn't find stream's bind pair index"); 1129 } 1130 } else { 1131 for (int i = 0; i < numPackedStreams; i++) { 1132 final int packedStreamIndex = 1133 assertFitsIntoNonNegativeInt("packedStreamIndex", readUint64(header)); 1134 if (packedStreamIndex >= totalInStreams) { 1135 throw new IOException("packedStreamIndex is bigger than number of totalInStreams"); 1136 } 1137 } 1138 } 1139 1140 return (int) totalOutStreams; 1141 } 1142 1143 private Folder readFolder(final ByteBuffer header) throws IOException { 1144 final Folder folder = new Folder(); 1145 1146 final long numCoders = readUint64(header); 1147 final Coder[] coders = new Coder[(int)numCoders]; 1148 long totalInStreams = 0; 1149 long totalOutStreams = 0; 1150 for (int i = 0; i < coders.length; i++) { 1151 coders[i] = new Coder(); 1152 final int bits = getUnsignedByte(header); 1153 final int idSize = bits & 0xf; 1154 final boolean isSimple = (bits & 0x10) == 0; 1155 final boolean hasAttributes = (bits & 0x20) != 0; 1156 final boolean moreAlternativeMethods = (bits & 0x80) != 0; 1157 1158 coders[i].decompressionMethodId = new byte[idSize]; 1159 get(header, coders[i].decompressionMethodId); 1160 if (isSimple) { 1161 coders[i].numInStreams = 1; 1162 coders[i].numOutStreams = 1; 1163 } else { 1164 coders[i].numInStreams = readUint64(header); 1165 coders[i].numOutStreams = readUint64(header); 1166 } 1167 totalInStreams += coders[i].numInStreams; 1168 totalOutStreams += coders[i].numOutStreams; 1169 if (hasAttributes) { 1170 final long propertiesSize = readUint64(header); 1171 coders[i].properties = new byte[(int)propertiesSize]; 1172 get(header, coders[i].properties); 1173 } 1174 // would need to keep looping as above: 1175 while (moreAlternativeMethods) { 1176 throw new IOException("Alternative methods are unsupported, please report. " + // NOSONAR 1177 "The reference implementation doesn't support them either."); 1178 } 1179 } 1180 folder.coders = coders; 1181 folder.totalInputStreams = totalInStreams; 1182 folder.totalOutputStreams = totalOutStreams; 1183 1184 final long numBindPairs = totalOutStreams - 1; 1185 final BindPair[] bindPairs = new BindPair[(int)numBindPairs]; 1186 for (int i = 0; i < bindPairs.length; i++) { 1187 bindPairs[i] = new BindPair(); 1188 bindPairs[i].inIndex = readUint64(header); 1189 bindPairs[i].outIndex = readUint64(header); 1190 } 1191 folder.bindPairs = bindPairs; 1192 1193 final long numPackedStreams = totalInStreams - numBindPairs; 1194 final long[] packedStreams = new long[(int)numPackedStreams]; 1195 if (numPackedStreams == 1) { 1196 int i; 1197 for (i = 0; i < (int)totalInStreams; i++) { 1198 if (folder.findBindPairForInStream(i) < 0) { 1199 break; 1200 } 1201 } 1202 packedStreams[0] = i; 1203 } else { 1204 for (int i = 0; i < (int)numPackedStreams; i++) { 1205 packedStreams[i] = readUint64(header); 1206 } 1207 } 1208 folder.packedStreams = packedStreams; 1209 1210 return folder; 1211 } 1212 1213 private BitSet readAllOrBits(final ByteBuffer header, final int size) throws IOException { 1214 final int areAllDefined = getUnsignedByte(header); 1215 final BitSet bits; 1216 if (areAllDefined != 0) { 1217 bits = new BitSet(size); 1218 for (int i = 0; i < size; i++) { 1219 bits.set(i, true); 1220 } 1221 } else { 1222 bits = readBits(header, size); 1223 } 1224 return bits; 1225 } 1226 1227 private BitSet readBits(final ByteBuffer header, final int size) throws IOException { 1228 final BitSet bits = new BitSet(size); 1229 int mask = 0; 1230 int cache = 0; 1231 for (int i = 0; i < size; i++) { 1232 if (mask == 0) { 1233 mask = 0x80; 1234 cache = getUnsignedByte(header); 1235 } 1236 bits.set(i, (cache & mask) != 0); 1237 mask >>>= 1; 1238 } 1239 return bits; 1240 } 1241 1242 private void sanityCheckFilesInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { 1243 stats.numberOfEntries = assertFitsIntoNonNegativeInt("numFiles", readUint64(header)); 1244 1245 int emptyStreams = -1; 1246 while (true) { 1247 final int propertyType = getUnsignedByte(header); 1248 if (propertyType == 0) { 1249 break; 1250 } 1251 final long size = readUint64(header); 1252 switch (propertyType) { 1253 case NID.kEmptyStream: { 1254 emptyStreams = readBits(header, stats.numberOfEntries).cardinality(); 1255 break; 1256 } 1257 case NID.kEmptyFile: { 1258 if (emptyStreams == -1) { 1259 throw new IOException("Header format error: kEmptyStream must appear before kEmptyFile"); 1260 } 1261 readBits(header, emptyStreams); 1262 break; 1263 } 1264 case NID.kAnti: { 1265 if (emptyStreams == -1) { 1266 throw new IOException("Header format error: kEmptyStream must appear before kAnti"); 1267 } 1268 readBits(header, emptyStreams); 1269 break; 1270 } 1271 case NID.kName: { 1272 final int external = getUnsignedByte(header); 1273 if (external != 0) { 1274 throw new IOException("Not implemented"); 1275 } 1276 final int namesLength = 1277 assertFitsIntoNonNegativeInt("file names length", size - 1); 1278 if ((namesLength & 1) != 0) { 1279 throw new IOException("File names length invalid"); 1280 } 1281 1282 int filesSeen = 0; 1283 for (int i = 0; i < namesLength; i += 2) { 1284 final char c = getChar(header); 1285 if (c == 0) { 1286 filesSeen++; 1287 } 1288 } 1289 if (filesSeen != stats.numberOfEntries) { 1290 throw new IOException("Invalid number of file names (" + filesSeen + " instead of " 1291 + stats.numberOfEntries + ")"); 1292 } 1293 break; 1294 } 1295 case NID.kCTime: { 1296 final int timesDefined = readAllOrBits(header, stats.numberOfEntries) 1297 .cardinality(); 1298 final int external = getUnsignedByte(header); 1299 if (external != 0) { 1300 throw new IOException("Not implemented"); 1301 } 1302 if (skipBytesFully(header, 8 * timesDefined) < 8 * timesDefined) { 1303 throw new IOException("invalid creation dates size"); 1304 } 1305 break; 1306 } 1307 case NID.kATime: { 1308 final int timesDefined = readAllOrBits(header, stats.numberOfEntries) 1309 .cardinality(); 1310 final int external = getUnsignedByte(header); 1311 if (external != 0) { 1312 throw new IOException("Not implemented"); 1313 } 1314 if (skipBytesFully(header, 8 * timesDefined) < 8 * timesDefined) { 1315 throw new IOException("invalid access dates size"); 1316 } 1317 break; 1318 } 1319 case NID.kMTime: { 1320 final int timesDefined = readAllOrBits(header, stats.numberOfEntries) 1321 .cardinality(); 1322 final int external = getUnsignedByte(header); 1323 if (external != 0) { 1324 throw new IOException("Not implemented"); 1325 } 1326 if (skipBytesFully(header, 8 * timesDefined) < 8 * timesDefined) { 1327 throw new IOException("invalid modification dates size"); 1328 } 1329 break; 1330 } 1331 case NID.kWinAttributes: { 1332 final int attributesDefined = readAllOrBits(header, stats.numberOfEntries) 1333 .cardinality(); 1334 final int external = getUnsignedByte(header); 1335 if (external != 0) { 1336 throw new IOException("Not implemented"); 1337 } 1338 if (skipBytesFully(header, 4 * attributesDefined) < 4 * attributesDefined) { 1339 throw new IOException("invalid windows attributes size"); 1340 } 1341 break; 1342 } 1343 case NID.kStartPos: { 1344 throw new IOException("kStartPos is unsupported, please report"); 1345 } 1346 case NID.kDummy: { 1347 // 7z 9.20 asserts the content is all zeros and ignores the property 1348 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 1349 1350 if (skipBytesFully(header, size) < size) { 1351 throw new IOException("Incomplete kDummy property"); 1352 } 1353 break; 1354 } 1355 1356 default: { 1357 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 1358 if (skipBytesFully(header, size) < size) { 1359 throw new IOException("Incomplete property of type " + propertyType); 1360 } 1361 break; 1362 } 1363 } 1364 } 1365 stats.numberOfEntriesWithStream = stats.numberOfEntries - (emptyStreams > 0 ? emptyStreams : 0); 1366 } 1367 1368 private void readFilesInfo(final ByteBuffer header, final Archive archive) throws IOException { 1369 final int numFilesInt = (int) readUint64(header);; 1370 final Map<Integer, SevenZArchiveEntry> fileMap = new HashMap<>(); 1371 BitSet isEmptyStream = null; 1372 BitSet isEmptyFile = null; 1373 BitSet isAnti = null; 1374 while (true) { 1375 final int propertyType = getUnsignedByte(header); 1376 if (propertyType == 0) { 1377 break; 1378 } 1379 final long size = readUint64(header); 1380 switch (propertyType) { 1381 case NID.kEmptyStream: { 1382 isEmptyStream = readBits(header, numFilesInt); 1383 break; 1384 } 1385 case NID.kEmptyFile: { 1386 isEmptyFile = readBits(header, isEmptyStream.cardinality()); 1387 break; 1388 } 1389 case NID.kAnti: { 1390 isAnti = readBits(header, isEmptyStream.cardinality()); 1391 break; 1392 } 1393 case NID.kName: { 1394 /* final int external = */ getUnsignedByte(header); 1395 final byte[] names = new byte[(int) (size - 1)]; 1396 final int namesLength = names.length; 1397 get(header, names); 1398 int nextFile = 0; 1399 int nextName = 0; 1400 for (int i = 0; i < namesLength; i += 2) { 1401 if (names[i] == 0 && names[i + 1] == 0) { 1402 checkEntryIsInitialized(fileMap, nextFile); 1403 fileMap.get(nextFile).setName(new String(names, nextName, i - nextName, StandardCharsets.UTF_16LE)); 1404 nextName = i + 2; 1405 nextFile++; 1406 } 1407 } 1408 if (nextName != namesLength || nextFile != numFilesInt) { 1409 throw new IOException("Error parsing file names"); 1410 } 1411 break; 1412 } 1413 case NID.kCTime: { 1414 final BitSet timesDefined = readAllOrBits(header, numFilesInt); 1415 /* final int external = */ getUnsignedByte(header); 1416 for (int i = 0; i < numFilesInt; i++) { 1417 checkEntryIsInitialized(fileMap, i); 1418 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1419 entryAtIndex.setHasCreationDate(timesDefined.get(i)); 1420 if (entryAtIndex.getHasCreationDate()) { 1421 entryAtIndex.setCreationDate(getLong(header)); 1422 } 1423 } 1424 break; 1425 } 1426 case NID.kATime: { 1427 final BitSet timesDefined = readAllOrBits(header, numFilesInt); 1428 /* final int external = */ getUnsignedByte(header); 1429 for (int i = 0; i < numFilesInt; i++) { 1430 checkEntryIsInitialized(fileMap, i); 1431 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1432 entryAtIndex.setHasAccessDate(timesDefined.get(i)); 1433 if (entryAtIndex.getHasAccessDate()) { 1434 entryAtIndex.setAccessDate(getLong(header)); 1435 } 1436 } 1437 break; 1438 } 1439 case NID.kMTime: { 1440 final BitSet timesDefined = readAllOrBits(header, numFilesInt); 1441 /* final int external = */ getUnsignedByte(header); 1442 for (int i = 0; i < numFilesInt; i++) { 1443 checkEntryIsInitialized(fileMap, i); 1444 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1445 entryAtIndex.setHasLastModifiedDate(timesDefined.get(i)); 1446 if (entryAtIndex.getHasLastModifiedDate()) { 1447 entryAtIndex.setLastModifiedDate(getLong(header)); 1448 } 1449 } 1450 break; 1451 } 1452 case NID.kWinAttributes: { 1453 final BitSet attributesDefined = readAllOrBits(header, numFilesInt); 1454 /* final int external = */ getUnsignedByte(header); 1455 for (int i = 0; i < numFilesInt; i++) { 1456 checkEntryIsInitialized(fileMap, i); 1457 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1458 entryAtIndex.setHasWindowsAttributes(attributesDefined.get(i)); 1459 if (entryAtIndex.getHasWindowsAttributes()) { 1460 entryAtIndex.setWindowsAttributes(getInt(header)); 1461 } 1462 } 1463 break; 1464 } 1465 case NID.kDummy: { 1466 // 7z 9.20 asserts the content is all zeros and ignores the property 1467 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 1468 1469 skipBytesFully(header, size); 1470 break; 1471 } 1472 1473 default: { 1474 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 1475 skipBytesFully(header, size); 1476 break; 1477 } 1478 } 1479 } 1480 int nonEmptyFileCounter = 0; 1481 int emptyFileCounter = 0; 1482 for (int i = 0; i < numFilesInt; i++) { 1483 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1484 if (entryAtIndex == null) { 1485 continue; 1486 } 1487 entryAtIndex.setHasStream(isEmptyStream == null || !isEmptyStream.get(i)); 1488 if (entryAtIndex.hasStream()) { 1489 if (archive.subStreamsInfo == null) { 1490 throw new IOException("Archive contains file with streams but no subStreamsInfo"); 1491 } 1492 entryAtIndex.setDirectory(false); 1493 entryAtIndex.setAntiItem(false); 1494 entryAtIndex.setHasCrc(archive.subStreamsInfo.hasCrc.get(nonEmptyFileCounter)); 1495 entryAtIndex.setCrcValue(archive.subStreamsInfo.crcs[nonEmptyFileCounter]); 1496 entryAtIndex.setSize(archive.subStreamsInfo.unpackSizes[nonEmptyFileCounter]); 1497 if (entryAtIndex.getSize() < 0) { 1498 throw new IOException("broken archive, entry with negative size"); 1499 } 1500 ++nonEmptyFileCounter; 1501 } else { 1502 entryAtIndex.setDirectory(isEmptyFile == null || !isEmptyFile.get(emptyFileCounter)); 1503 entryAtIndex.setAntiItem(isAnti != null && isAnti.get(emptyFileCounter)); 1504 entryAtIndex.setHasCrc(false); 1505 entryAtIndex.setSize(0); 1506 ++emptyFileCounter; 1507 } 1508 } 1509 final List<SevenZArchiveEntry> entries = new ArrayList<>(); 1510 for (final SevenZArchiveEntry e : fileMap.values()) { 1511 if (e != null) { 1512 entries.add(e); 1513 } 1514 } 1515 archive.files = entries.toArray(SevenZArchiveEntry.EMPTY_SEVEN_Z_ARCHIVE_ENTRY_ARRAY); 1516 calculateStreamMap(archive); 1517 } 1518 1519 private void checkEntryIsInitialized(final Map<Integer, SevenZArchiveEntry> archiveEntries, final int index) { 1520 if (archiveEntries.get(index) == null) { 1521 archiveEntries.put(index, new SevenZArchiveEntry()); 1522 } 1523 } 1524 1525 private void calculateStreamMap(final Archive archive) throws IOException { 1526 final StreamMap streamMap = new StreamMap(); 1527 1528 int nextFolderPackStreamIndex = 0; 1529 final int numFolders = archive.folders != null ? archive.folders.length : 0; 1530 streamMap.folderFirstPackStreamIndex = new int[numFolders]; 1531 for (int i = 0; i < numFolders; i++) { 1532 streamMap.folderFirstPackStreamIndex[i] = nextFolderPackStreamIndex; 1533 nextFolderPackStreamIndex += archive.folders[i].packedStreams.length; 1534 } 1535 1536 long nextPackStreamOffset = 0; 1537 final int numPackSizes = archive.packSizes.length; 1538 streamMap.packStreamOffsets = new long[numPackSizes]; 1539 for (int i = 0; i < numPackSizes; i++) { 1540 streamMap.packStreamOffsets[i] = nextPackStreamOffset; 1541 nextPackStreamOffset += archive.packSizes[i]; 1542 } 1543 1544 streamMap.folderFirstFileIndex = new int[numFolders]; 1545 streamMap.fileFolderIndex = new int[archive.files.length]; 1546 int nextFolderIndex = 0; 1547 int nextFolderUnpackStreamIndex = 0; 1548 for (int i = 0; i < archive.files.length; i++) { 1549 if (!archive.files[i].hasStream() && nextFolderUnpackStreamIndex == 0) { 1550 streamMap.fileFolderIndex[i] = -1; 1551 continue; 1552 } 1553 if (nextFolderUnpackStreamIndex == 0) { 1554 for (; nextFolderIndex < archive.folders.length; ++nextFolderIndex) { 1555 streamMap.folderFirstFileIndex[nextFolderIndex] = i; 1556 if (archive.folders[nextFolderIndex].numUnpackSubStreams > 0) { 1557 break; 1558 } 1559 } 1560 if (nextFolderIndex >= archive.folders.length) { 1561 throw new IOException("Too few folders in archive"); 1562 } 1563 } 1564 streamMap.fileFolderIndex[i] = nextFolderIndex; 1565 if (!archive.files[i].hasStream()) { 1566 continue; 1567 } 1568 ++nextFolderUnpackStreamIndex; 1569 if (nextFolderUnpackStreamIndex >= archive.folders[nextFolderIndex].numUnpackSubStreams) { 1570 ++nextFolderIndex; 1571 nextFolderUnpackStreamIndex = 0; 1572 } 1573 } 1574 1575 archive.streamMap = streamMap; 1576 } 1577 1578 /** 1579 * Build the decoding stream for the entry to be read. 1580 * This method may be called from a random access(getInputStream) or 1581 * sequential access(getNextEntry). 1582 * If this method is called from a random access, some entries may 1583 * need to be skipped(we put them to the deferredBlockStreams and 1584 * skip them when actually needed to improve the performance) 1585 * 1586 * @param entryIndex the index of the entry to be read 1587 * @param isRandomAccess is this called in a random access 1588 * @throws IOException if there are exceptions when reading the file 1589 */ 1590 private void buildDecodingStream(final int entryIndex, final boolean isRandomAccess) throws IOException { 1591 if (archive.streamMap == null) { 1592 throw new IOException("Archive doesn't contain stream information to read entries"); 1593 } 1594 final int folderIndex = archive.streamMap.fileFolderIndex[entryIndex]; 1595 if (folderIndex < 0) { 1596 deferredBlockStreams.clear(); 1597 // TODO: previously it'd return an empty stream? 1598 // new BoundedInputStream(new ByteArrayInputStream(ByteUtils.EMPTY_BYTE_ARRAY), 0); 1599 return; 1600 } 1601 final SevenZArchiveEntry file = archive.files[entryIndex]; 1602 boolean isInSameFolder = false; 1603 if (currentFolderIndex == folderIndex) { 1604 // (COMPRESS-320). 1605 // The current entry is within the same (potentially opened) folder. The 1606 // previous stream has to be fully decoded before we can start reading 1607 // but don't do it eagerly -- if the user skips over the entire folder nothing 1608 // is effectively decompressed. 1609 if (entryIndex > 0) { 1610 file.setContentMethods(archive.files[entryIndex - 1].getContentMethods()); 1611 } 1612 1613 // if this is called in a random access, then the content methods of previous entry may be null 1614 // the content methods should be set to methods of the first entry as it must not be null, 1615 // and the content methods would only be set if the content methods was not set 1616 if(isRandomAccess && file.getContentMethods() == null) { 1617 final int folderFirstFileIndex = archive.streamMap.folderFirstFileIndex[folderIndex]; 1618 final SevenZArchiveEntry folderFirstFile = archive.files[folderFirstFileIndex]; 1619 file.setContentMethods(folderFirstFile.getContentMethods()); 1620 } 1621 isInSameFolder = true; 1622 } else { 1623 currentFolderIndex = folderIndex; 1624 // We're opening a new folder. Discard any queued streams/ folder stream. 1625 reopenFolderInputStream(folderIndex, file); 1626 } 1627 1628 boolean haveSkippedEntries = false; 1629 if (isRandomAccess) { 1630 // entries will only need to be skipped if it's a random access 1631 haveSkippedEntries = skipEntriesWhenNeeded(entryIndex, isInSameFolder, folderIndex); 1632 } 1633 1634 if (isRandomAccess && currentEntryIndex == entryIndex && !haveSkippedEntries) { 1635 // we don't need to add another entry to the deferredBlockStreams when : 1636 // 1. If this method is called in a random access and the entry index 1637 // to be read equals to the current entry index, the input stream 1638 // has already been put in the deferredBlockStreams 1639 // 2. If this entry has not been read(which means no entries are skipped) 1640 return; 1641 } 1642 1643 InputStream fileStream = new BoundedInputStream(currentFolderInputStream, file.getSize()); 1644 if (file.getHasCrc()) { 1645 fileStream = new CRC32VerifyingInputStream(fileStream, file.getSize(), file.getCrcValue()); 1646 } 1647 1648 deferredBlockStreams.add(fileStream); 1649 } 1650 1651 /** 1652 * Discard any queued streams/ folder stream, and reopen the current folder input stream. 1653 * 1654 * @param folderIndex the index of the folder to reopen 1655 * @param file the 7z entry to read 1656 * @throws IOException if exceptions occur when reading the 7z file 1657 */ 1658 private void reopenFolderInputStream(final int folderIndex, final SevenZArchiveEntry file) throws IOException { 1659 deferredBlockStreams.clear(); 1660 if (currentFolderInputStream != null) { 1661 currentFolderInputStream.close(); 1662 currentFolderInputStream = null; 1663 } 1664 final Folder folder = archive.folders[folderIndex]; 1665 final int firstPackStreamIndex = archive.streamMap.folderFirstPackStreamIndex[folderIndex]; 1666 final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos + 1667 archive.streamMap.packStreamOffsets[firstPackStreamIndex]; 1668 1669 currentFolderInputStream = buildDecoderStack(folder, folderOffset, firstPackStreamIndex, file); 1670 } 1671 1672 /** 1673 * Skip all the entries if needed. 1674 * Entries need to be skipped when: 1675 * <p> 1676 * 1. it's a random access 1677 * 2. one of these 2 condition is meet : 1678 * <p> 1679 * 2.1 currentEntryIndex != entryIndex : this means there are some entries 1680 * to be skipped(currentEntryIndex < entryIndex) or the entry has already 1681 * been read(currentEntryIndex > entryIndex) 1682 * <p> 1683 * 2.2 currentEntryIndex == entryIndex && !hasCurrentEntryBeenRead: 1684 * if the entry to be read is the current entry, but some data of it has 1685 * been read before, then we need to reopen the stream of the folder and 1686 * skip all the entries before the current entries 1687 * 1688 * @param entryIndex the entry to be read 1689 * @param isInSameFolder are the entry to be read and the current entry in the same folder 1690 * @param folderIndex the index of the folder which contains the entry 1691 * @return true if there are entries actually skipped 1692 * @throws IOException there are exceptions when skipping entries 1693 * @since 1.21 1694 */ 1695 private boolean skipEntriesWhenNeeded(final int entryIndex, final boolean isInSameFolder, final int folderIndex) throws IOException { 1696 final SevenZArchiveEntry file = archive.files[entryIndex]; 1697 // if the entry to be read is the current entry, and the entry has not 1698 // been read yet, then there's nothing we need to do 1699 if (currentEntryIndex == entryIndex && !hasCurrentEntryBeenRead()) { 1700 return false; 1701 } 1702 1703 // 1. if currentEntryIndex < entryIndex : 1704 // this means there are some entries to be skipped(currentEntryIndex < entryIndex) 1705 // 2. if currentEntryIndex > entryIndex || (currentEntryIndex == entryIndex && hasCurrentEntryBeenRead) : 1706 // this means the entry has already been read before, and we need to reopen the 1707 // stream of the folder and skip all the entries before the current entries 1708 int filesToSkipStartIndex = archive.streamMap.folderFirstFileIndex[currentFolderIndex]; 1709 if (isInSameFolder) { 1710 if (currentEntryIndex < entryIndex) { 1711 // the entries between filesToSkipStartIndex and currentEntryIndex had already been skipped 1712 filesToSkipStartIndex = currentEntryIndex + 1; 1713 } else { 1714 // the entry is in the same folder of current entry, but it has already been read before, we need to reset 1715 // the position of the currentFolderInputStream to the beginning of folder, and then skip the files 1716 // from the start entry of the folder again 1717 reopenFolderInputStream(folderIndex, file); 1718 } 1719 } 1720 1721 for (int i = filesToSkipStartIndex; i < entryIndex; i++) { 1722 final SevenZArchiveEntry fileToSkip = archive.files[i]; 1723 InputStream fileStreamToSkip = new BoundedInputStream(currentFolderInputStream, fileToSkip.getSize()); 1724 if (fileToSkip.getHasCrc()) { 1725 fileStreamToSkip = new CRC32VerifyingInputStream(fileStreamToSkip, fileToSkip.getSize(), fileToSkip.getCrcValue()); 1726 } 1727 deferredBlockStreams.add(fileStreamToSkip); 1728 1729 // set the content methods as well, it equals to file.getContentMethods() because they are in same folder 1730 fileToSkip.setContentMethods(file.getContentMethods()); 1731 } 1732 return true; 1733 } 1734 1735 /** 1736 * Find out if any data of current entry has been read or not. 1737 * This is achieved by comparing the bytes remaining to read 1738 * and the size of the file. 1739 * 1740 * @return true if any data of current entry has been read 1741 * @since 1.21 1742 */ 1743 private boolean hasCurrentEntryBeenRead() { 1744 boolean hasCurrentEntryBeenRead = false; 1745 if (!deferredBlockStreams.isEmpty()) { 1746 final InputStream currentEntryInputStream = deferredBlockStreams.get(deferredBlockStreams.size() - 1); 1747 // get the bytes remaining to read, and compare it with the size of 1748 // the file to figure out if the file has been read 1749 if (currentEntryInputStream instanceof CRC32VerifyingInputStream) { 1750 hasCurrentEntryBeenRead = ((CRC32VerifyingInputStream) currentEntryInputStream).getBytesRemaining() != archive.files[currentEntryIndex].getSize(); 1751 } 1752 1753 if (currentEntryInputStream instanceof BoundedInputStream) { 1754 hasCurrentEntryBeenRead = ((BoundedInputStream) currentEntryInputStream).getBytesRemaining() != archive.files[currentEntryIndex].getSize(); 1755 } 1756 } 1757 return hasCurrentEntryBeenRead; 1758 } 1759 1760 private InputStream buildDecoderStack(final Folder folder, final long folderOffset, 1761 final int firstPackStreamIndex, final SevenZArchiveEntry entry) throws IOException { 1762 channel.position(folderOffset); 1763 InputStream inputStreamStack = new FilterInputStream(new BufferedInputStream( 1764 new BoundedSeekableByteChannelInputStream(channel, 1765 archive.packSizes[firstPackStreamIndex]))) { 1766 @Override 1767 public int read() throws IOException { 1768 final int r = in.read(); 1769 if (r >= 0) { 1770 count(1); 1771 } 1772 return r; 1773 } 1774 @Override 1775 public int read(final byte[] b) throws IOException { 1776 return read(b, 0, b.length); 1777 } 1778 @Override 1779 public int read(final byte[] b, final int off, final int len) throws IOException { 1780 if (len == 0) { 1781 return 0; 1782 } 1783 final int r = in.read(b, off, len); 1784 if (r >= 0) { 1785 count(r); 1786 } 1787 return r; 1788 } 1789 private void count(final int c) { 1790 compressedBytesReadFromCurrentEntry += c; 1791 } 1792 }; 1793 final LinkedList<SevenZMethodConfiguration> methods = new LinkedList<>(); 1794 for (final Coder coder : folder.getOrderedCoders()) { 1795 if (coder.numInStreams != 1 || coder.numOutStreams != 1) { 1796 throw new IOException("Multi input/output stream coders are not yet supported"); 1797 } 1798 final SevenZMethod method = SevenZMethod.byId(coder.decompressionMethodId); 1799 inputStreamStack = Coders.addDecoder(fileName, inputStreamStack, 1800 folder.getUnpackSizeForCoder(coder), coder, password, options.getMaxMemoryLimitInKb()); 1801 methods.addFirst(new SevenZMethodConfiguration(method, 1802 Coders.findByMethod(method).getOptionsFromCoder(coder, inputStreamStack))); 1803 } 1804 entry.setContentMethods(methods); 1805 if (folder.hasCrc) { 1806 return new CRC32VerifyingInputStream(inputStreamStack, 1807 folder.getUnpackSize(), folder.crc); 1808 } 1809 return inputStreamStack; 1810 } 1811 1812 /** 1813 * Reads a byte of data. 1814 * 1815 * @return the byte read, or -1 if end of input is reached 1816 * @throws IOException 1817 * if an I/O error has occurred 1818 */ 1819 public int read() throws IOException { 1820 final int b = getCurrentStream().read(); 1821 if (b >= 0) { 1822 uncompressedBytesReadFromCurrentEntry++; 1823 } 1824 return b; 1825 } 1826 1827 private InputStream getCurrentStream() throws IOException { 1828 if (archive.files[currentEntryIndex].getSize() == 0) { 1829 return new ByteArrayInputStream(ByteUtils.EMPTY_BYTE_ARRAY); 1830 } 1831 if (deferredBlockStreams.isEmpty()) { 1832 throw new IllegalStateException("No current 7z entry (call getNextEntry() first)."); 1833 } 1834 1835 while (deferredBlockStreams.size() > 1) { 1836 // In solid compression mode we need to decompress all leading folder' 1837 // streams to get access to an entry. We defer this until really needed 1838 // so that entire blocks can be skipped without wasting time for decompression. 1839 try (final InputStream stream = deferredBlockStreams.remove(0)) { 1840 IOUtils.skip(stream, Long.MAX_VALUE); 1841 } 1842 compressedBytesReadFromCurrentEntry = 0; 1843 } 1844 1845 return deferredBlockStreams.get(0); 1846 } 1847 1848 /** 1849 * Returns an InputStream for reading the contents of the given entry. 1850 * 1851 * <p>For archives using solid compression randomly accessing 1852 * entries will be significantly slower than reading the archive 1853 * sequentially.</p> 1854 * 1855 * @param entry the entry to get the stream for. 1856 * @return a stream to read the entry from. 1857 * @throws IOException if unable to create an input stream from the zipentry 1858 * @since Compress 1.20 1859 */ 1860 public InputStream getInputStream(final SevenZArchiveEntry entry) throws IOException { 1861 int entryIndex = -1; 1862 for (int i = 0; i < this.archive.files.length;i++) { 1863 if (entry == this.archive.files[i]) { 1864 entryIndex = i; 1865 break; 1866 } 1867 } 1868 1869 if (entryIndex < 0) { 1870 throw new IllegalArgumentException("Can not find " + entry.getName() + " in " + this.fileName); 1871 } 1872 1873 buildDecodingStream(entryIndex, true); 1874 currentEntryIndex = entryIndex; 1875 currentFolderIndex = archive.streamMap.fileFolderIndex[entryIndex]; 1876 return getCurrentStream(); 1877 } 1878 1879 /** 1880 * Reads data into an array of bytes. 1881 * 1882 * @param b the array to write data to 1883 * @return the number of bytes read, or -1 if end of input is reached 1884 * @throws IOException 1885 * if an I/O error has occurred 1886 */ 1887 public int read(final byte[] b) throws IOException { 1888 return read(b, 0, b.length); 1889 } 1890 1891 /** 1892 * Reads data into an array of bytes. 1893 * 1894 * @param b the array to write data to 1895 * @param off offset into the buffer to start filling at 1896 * @param len of bytes to read 1897 * @return the number of bytes read, or -1 if end of input is reached 1898 * @throws IOException 1899 * if an I/O error has occurred 1900 */ 1901 public int read(final byte[] b, final int off, final int len) throws IOException { 1902 if (len == 0) { 1903 return 0; 1904 } 1905 final int cnt = getCurrentStream().read(b, off, len); 1906 if (cnt > 0) { 1907 uncompressedBytesReadFromCurrentEntry += cnt; 1908 } 1909 return cnt; 1910 } 1911 1912 /** 1913 * Provides statistics for bytes read from the current entry. 1914 * 1915 * @return statistics for bytes read from the current entry 1916 * @since 1.17 1917 */ 1918 public InputStreamStatistics getStatisticsForCurrentEntry() { 1919 return new InputStreamStatistics() { 1920 @Override 1921 public long getCompressedCount() { 1922 return compressedBytesReadFromCurrentEntry; 1923 } 1924 @Override 1925 public long getUncompressedCount() { 1926 return uncompressedBytesReadFromCurrentEntry; 1927 } 1928 }; 1929 } 1930 1931 private static long readUint64(final ByteBuffer in) throws IOException { 1932 // long rather than int as it might get shifted beyond the range of an int 1933 final long firstByte = getUnsignedByte(in); 1934 int mask = 0x80; 1935 long value = 0; 1936 for (int i = 0; i < 8; i++) { 1937 if ((firstByte & mask) == 0) { 1938 return value | ((firstByte & (mask - 1)) << (8 * i)); 1939 } 1940 final long nextByte = getUnsignedByte(in); 1941 value |= nextByte << (8 * i); 1942 mask >>>= 1; 1943 } 1944 return value; 1945 } 1946 1947 private static char getChar(final ByteBuffer buf) throws IOException { 1948 if (buf.remaining() < 2) { 1949 throw new EOFException(); 1950 } 1951 return buf.getChar(); 1952 } 1953 1954 private static int getInt(final ByteBuffer buf) throws IOException { 1955 if (buf.remaining() < 4) { 1956 throw new EOFException(); 1957 } 1958 return buf.getInt(); 1959 } 1960 1961 private static long getLong(final ByteBuffer buf) throws IOException { 1962 if (buf.remaining() < 8) { 1963 throw new EOFException(); 1964 } 1965 return buf.getLong(); 1966 } 1967 1968 private static void get(final ByteBuffer buf, final byte[] to) throws IOException { 1969 if (buf.remaining() < to.length) { 1970 throw new EOFException(); 1971 } 1972 buf.get(to); 1973 } 1974 1975 private static int getUnsignedByte(final ByteBuffer buf) throws IOException { 1976 if (!buf.hasRemaining()) { 1977 throw new EOFException(); 1978 } 1979 return buf.get() & 0xff; 1980 } 1981 1982 /** 1983 * Checks if the signature matches what is expected for a 7z file. 1984 * 1985 * @param signature 1986 * the bytes to check 1987 * @param length 1988 * the number of bytes to check 1989 * @return true, if this is the signature of a 7z archive. 1990 * @since 1.8 1991 */ 1992 public static boolean matches(final byte[] signature, final int length) { 1993 if (length < sevenZSignature.length) { 1994 return false; 1995 } 1996 1997 for (int i = 0; i < sevenZSignature.length; i++) { 1998 if (signature[i] != sevenZSignature[i]) { 1999 return false; 2000 } 2001 } 2002 return true; 2003 } 2004 2005 private static long skipBytesFully(final ByteBuffer input, long bytesToSkip) throws IOException { 2006 if (bytesToSkip < 1) { 2007 return 0; 2008 } 2009 final int current = input.position(); 2010 final int maxSkip = input.remaining(); 2011 if (maxSkip < bytesToSkip) { 2012 bytesToSkip = maxSkip; 2013 } 2014 input.position(current + (int) bytesToSkip); 2015 return bytesToSkip; 2016 } 2017 2018 private void readFully(final ByteBuffer buf) throws IOException { 2019 buf.rewind(); 2020 IOUtils.readFully(channel, buf); 2021 buf.flip(); 2022 } 2023 2024 @Override 2025 public String toString() { 2026 return archive.toString(); 2027 } 2028 2029 /** 2030 * Derives a default file name from the archive name - if known. 2031 * 2032 * <p>This implements the same heuristics the 7z tools use. In 2033 * 7z's case if an archive contains entries without a name - 2034 * i.e. {@link SevenZArchiveEntry#getName} returns {@code null} - 2035 * then its command line and GUI tools will use this default name 2036 * when extracting the entries.</p> 2037 * 2038 * @return null if the name of the archive is unknown. Otherwise 2039 * if the name of the archive has got any extension, it is 2040 * stripped and the remainder returned. Finally if the name of the 2041 * archive hasn't got any extension then a {@code ~} character is 2042 * appended to the archive name. 2043 * 2044 * @since 1.19 2045 */ 2046 public String getDefaultName() { 2047 if (DEFAULT_FILE_NAME.equals(fileName) || fileName == null) { 2048 return null; 2049 } 2050 2051 final String lastSegment = new File(fileName).getName(); 2052 final int dotPos = lastSegment.lastIndexOf("."); 2053 if (dotPos > 0) { // if the file starts with a dot then this is not an extension 2054 return lastSegment.substring(0, dotPos); 2055 } 2056 return lastSegment + "~"; 2057 } 2058 2059 private static final CharsetEncoder PASSWORD_ENCODER = StandardCharsets.UTF_16LE.newEncoder(); 2060 2061 private static byte[] utf16Decode(final char[] chars) throws IOException { 2062 if (chars == null) { 2063 return null; 2064 } 2065 final ByteBuffer encoded = PASSWORD_ENCODER.encode(CharBuffer.wrap(chars)); 2066 if (encoded.hasArray()) { 2067 return encoded.array(); 2068 } 2069 final byte[] e = new byte[encoded.remaining()]; 2070 encoded.get(e); 2071 return e; 2072 } 2073 2074 private static int assertFitsIntoNonNegativeInt(final String what, final long value) throws IOException { 2075 if (value > Integer.MAX_VALUE || value < 0) { 2076 throw new IOException("Cannot handle " + what + " " + value); 2077 } 2078 return (int) value; 2079 } 2080 2081 private static class ArchiveStatistics { 2082 private int numberOfPackedStreams; 2083 private long numberOfCoders; 2084 private long numberOfOutStreams; 2085 private long numberOfInStreams; 2086 private long numberOfUnpackSubStreams; 2087 private int numberOfFolders; 2088 private BitSet folderHasCrc; 2089 private int numberOfEntries; 2090 private int numberOfEntriesWithStream; 2091 2092 @Override 2093 public String toString() { 2094 return "Archive with " + numberOfEntries + " entries in " + numberOfFolders 2095 + " folders. Estimated size " + (estimateSize()/1024l) + " kB."; 2096 } 2097 2098 long estimateSize() { 2099 long lowerBound = 16l * numberOfPackedStreams /* packSizes, packCrcs in Archive */ 2100 + numberOfPackedStreams / 8 /* packCrcsDefined in Archive */ 2101 + numberOfFolders * folderSize() /* folders in Archive */ 2102 + numberOfCoders * coderSize() /* coders in Folder */ 2103 + (numberOfOutStreams - numberOfFolders) * bindPairSize() /* bindPairs in Folder */ 2104 + 8l * (numberOfInStreams - numberOfOutStreams + numberOfFolders) /* packedStreams in Folder */ 2105 + 8l * numberOfOutStreams /* unpackSizes in Folder */ 2106 + numberOfEntries * entrySize() /* files in Archive */ 2107 + streamMapSize() 2108 ; 2109 return 2 * lowerBound /* conservative guess */; 2110 } 2111 2112 void assertValidity(int maxMemoryLimitInKb) throws IOException { 2113 if (numberOfEntriesWithStream > 0 && numberOfFolders == 0) { 2114 throw new IOException("archive with entries but no folders"); 2115 } 2116 if (numberOfEntriesWithStream > numberOfUnpackSubStreams) { 2117 throw new IOException("archive doesn't contain enough substreams for entries"); 2118 } 2119 2120 final long memoryNeededInKb = estimateSize() / 1024; 2121 if (maxMemoryLimitInKb < memoryNeededInKb) { 2122 throw new MemoryLimitException(memoryNeededInKb, maxMemoryLimitInKb); 2123 } 2124 } 2125 2126 private long folderSize() { 2127 return 30; /* nested arrays are accounted for separately */ 2128 } 2129 2130 private long coderSize() { 2131 return 2 /* methodId is between 1 and four bytes currently, COPY and LZMA2 are the most common with 1 */ 2132 + 16 2133 + 4 /* properties, guess */ 2134 ; 2135 } 2136 2137 private long bindPairSize() { 2138 return 16; 2139 } 2140 2141 private long entrySize() { 2142 return 100; /* real size depends on name length, everything without name is about 70 bytes */ 2143 } 2144 2145 private long streamMapSize() { 2146 return 8 * numberOfFolders /* folderFirstPackStreamIndex, folderFirstFileIndex */ 2147 + 8 * numberOfPackedStreams /* packStreamOffsets */ 2148 + 4 * numberOfEntries /* fileFolderIndex */ 2149 ; 2150 } 2151 } 2152}