001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018package org.apache.commons.compress.archivers.zip; 019 020import java.io.BufferedInputStream; 021import java.io.ByteArrayInputStream; 022import java.io.Closeable; 023import java.io.EOFException; 024import java.io.File; 025import java.io.IOException; 026import java.io.InputStream; 027import java.io.SequenceInputStream; 028import java.nio.ByteBuffer; 029import java.nio.channels.FileChannel; 030import java.nio.channels.SeekableByteChannel; 031import java.nio.file.Files; 032import java.nio.file.StandardOpenOption; 033import java.util.Arrays; 034import java.util.Collections; 035import java.util.Comparator; 036import java.util.Enumeration; 037import java.util.EnumSet; 038import java.util.HashMap; 039import java.util.LinkedList; 040import java.util.List; 041import java.util.Map; 042import java.util.zip.Inflater; 043import java.util.zip.ZipException; 044 045import org.apache.commons.compress.archivers.EntryStreamOffsets; 046import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; 047import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream; 048import org.apache.commons.compress.utils.BoundedArchiveInputStream; 049import org.apache.commons.compress.utils.BoundedSeekableByteChannelInputStream; 050import org.apache.commons.compress.utils.CountingInputStream; 051import org.apache.commons.compress.utils.IOUtils; 052import org.apache.commons.compress.utils.InputStreamStatistics; 053 054import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD; 055import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT; 056import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD; 057import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC; 058import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC_SHORT; 059 060/** 061 * Replacement for <code>java.util.ZipFile</code>. 062 * 063 * <p>This class adds support for file name encodings other than UTF-8 064 * (which is required to work on ZIP files created by native zip tools 065 * and is able to skip a preamble like the one found in self 066 * extracting archives. Furthermore it returns instances of 067 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code> 068 * instead of <code>java.util.zip.ZipEntry</code>.</p> 069 * 070 * <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would 071 * have to reimplement all methods anyway. Like 072 * <code>java.util.ZipFile</code>, it uses SeekableByteChannel under the 073 * covers and supports compressed and uncompressed entries. As of 074 * Apache Commons Compress 1.3 it also transparently supports Zip64 075 * extensions and thus individual entries and archives larger than 4 076 * GB or with more than 65536 entries.</p> 077 * 078 * <p>The method signatures mimic the ones of 079 * <code>java.util.zip.ZipFile</code>, with a couple of exceptions: 080 * 081 * <ul> 082 * <li>There is no getName method.</li> 083 * <li>entries has been renamed to getEntries.</li> 084 * <li>getEntries and getEntry return 085 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code> 086 * instances.</li> 087 * <li>close is allowed to throw IOException.</li> 088 * </ul> 089 * 090 */ 091public class ZipFile implements Closeable { 092 private static final int HASH_SIZE = 509; 093 static final int NIBLET_MASK = 0x0f; 094 static final int BYTE_SHIFT = 8; 095 private static final int POS_0 = 0; 096 private static final int POS_1 = 1; 097 private static final int POS_2 = 2; 098 private static final int POS_3 = 3; 099 private static final byte[] ONE_ZERO_BYTE = new byte[1]; 100 101 /** 102 * List of entries in the order they appear inside the central 103 * directory. 104 */ 105 private final List<ZipArchiveEntry> entries = 106 new LinkedList<>(); 107 108 /** 109 * Maps String to list of ZipArchiveEntrys, name -> actual entries. 110 */ 111 private final Map<String, LinkedList<ZipArchiveEntry>> nameMap = 112 new HashMap<>(HASH_SIZE); 113 114 /** 115 * The encoding to use for file names and the file comment. 116 * 117 * <p>For a list of possible values see <a 118 * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>. 119 * Defaults to UTF-8.</p> 120 */ 121 private final String encoding; 122 123 /** 124 * The zip encoding to use for file names and the file comment. 125 */ 126 private final ZipEncoding zipEncoding; 127 128 /** 129 * File name of actual source. 130 */ 131 private final String archiveName; 132 133 /** 134 * The actual data source. 135 */ 136 private final SeekableByteChannel archive; 137 138 /** 139 * Whether to look for and use Unicode extra fields. 140 */ 141 private final boolean useUnicodeExtraFields; 142 143 /** 144 * Whether the file is closed. 145 */ 146 private volatile boolean closed = true; 147 148 /** 149 * Whether the zip archive is a split zip archive 150 */ 151 private final boolean isSplitZipArchive; 152 153 // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) 154 private final byte[] dwordBuf = new byte[DWORD]; 155 private final byte[] wordBuf = new byte[WORD]; 156 private final byte[] cfhBuf = new byte[CFH_LEN]; 157 private final byte[] shortBuf = new byte[SHORT]; 158 private final ByteBuffer dwordBbuf = ByteBuffer.wrap(dwordBuf); 159 private final ByteBuffer wordBbuf = ByteBuffer.wrap(wordBuf); 160 private final ByteBuffer cfhBbuf = ByteBuffer.wrap(cfhBuf); 161 private final ByteBuffer shortBbuf = ByteBuffer.wrap(shortBuf); 162 163 private long centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset; 164 private long centralDirectoryStartOffset; 165 166 /** 167 * Opens the given file for reading, assuming "UTF8" for file names. 168 * 169 * @param f the archive. 170 * 171 * @throws IOException if an error occurs while reading the file. 172 */ 173 public ZipFile(final File f) throws IOException { 174 this(f, ZipEncodingHelper.UTF8); 175 } 176 177 /** 178 * Opens the given file for reading, assuming "UTF8". 179 * 180 * @param name name of the archive. 181 * 182 * @throws IOException if an error occurs while reading the file. 183 */ 184 public ZipFile(final String name) throws IOException { 185 this(new File(name), ZipEncodingHelper.UTF8); 186 } 187 188 /** 189 * Opens the given file for reading, assuming the specified 190 * encoding for file names, scanning unicode extra fields. 191 * 192 * @param name name of the archive. 193 * @param encoding the encoding to use for file names, use null 194 * for the platform's default encoding 195 * 196 * @throws IOException if an error occurs while reading the file. 197 */ 198 public ZipFile(final String name, final String encoding) throws IOException { 199 this(new File(name), encoding, true); 200 } 201 202 /** 203 * Opens the given file for reading, assuming the specified 204 * encoding for file names and scanning for unicode extra fields. 205 * 206 * @param f the archive. 207 * @param encoding the encoding to use for file names, use null 208 * for the platform's default encoding 209 * 210 * @throws IOException if an error occurs while reading the file. 211 */ 212 public ZipFile(final File f, final String encoding) throws IOException { 213 this(f, encoding, true); 214 } 215 216 /** 217 * Opens the given file for reading, assuming the specified 218 * encoding for file names. 219 * 220 * @param f the archive. 221 * @param encoding the encoding to use for file names, use null 222 * for the platform's default encoding 223 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 224 * Extra Fields (if present) to set the file names. 225 * 226 * @throws IOException if an error occurs while reading the file. 227 */ 228 public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields) 229 throws IOException { 230 this(f, encoding, useUnicodeExtraFields, false); 231 } 232 233 /** 234 * Opens the given file for reading, assuming the specified 235 * encoding for file names. 236 * 237 * 238 * <p>By default the central directory record and all local file headers of the archive will be read immediately 239 * which may take a considerable amount of time when the archive is big. The {@code ignoreLocalFileHeader} parameter 240 * can be set to {@code true} which restricts parsing to the central directory. Unfortunately the local file header 241 * may contain information not present inside of the central directory which will not be available when the argument 242 * is set to {@code true}. This includes the content of the Unicode extra field, so setting {@code 243 * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively. Also 244 * {@link #getRawInputStream} is always going to return {@code null} if {@code ignoreLocalFileHeader} is {@code 245 * true}.</p> 246 * 247 * @param f the archive. 248 * @param encoding the encoding to use for file names, use null 249 * for the platform's default encoding 250 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 251 * Extra Fields (if present) to set the file names. 252 * @param ignoreLocalFileHeader whether to ignore information 253 * stored inside the local file header (see the notes in this method's javadoc) 254 * 255 * @throws IOException if an error occurs while reading the file. 256 * @since 1.19 257 */ 258 public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields, 259 final boolean ignoreLocalFileHeader) 260 throws IOException { 261 this(Files.newByteChannel(f.toPath(), EnumSet.of(StandardOpenOption.READ)), 262 f.getAbsolutePath(), encoding, useUnicodeExtraFields, true, ignoreLocalFileHeader); 263 } 264 265 /** 266 * Opens the given channel for reading, assuming "UTF8" for file names. 267 * 268 * <p>{@link 269 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 270 * allows you to read from an in-memory archive.</p> 271 * 272 * @param channel the archive. 273 * 274 * @throws IOException if an error occurs while reading the file. 275 * @since 1.13 276 */ 277 public ZipFile(final SeekableByteChannel channel) 278 throws IOException { 279 this(channel, "unknown archive", ZipEncodingHelper.UTF8, true); 280 } 281 282 /** 283 * Opens the given channel for reading, assuming the specified 284 * encoding for file names. 285 * 286 * <p>{@link 287 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 288 * allows you to read from an in-memory archive.</p> 289 * 290 * @param channel the archive. 291 * @param encoding the encoding to use for file names, use null 292 * for the platform's default encoding 293 * 294 * @throws IOException if an error occurs while reading the file. 295 * @since 1.13 296 */ 297 public ZipFile(final SeekableByteChannel channel, final String encoding) 298 throws IOException { 299 this(channel, "unknown archive", encoding, true); 300 } 301 302 /** 303 * Opens the given channel for reading, assuming the specified 304 * encoding for file names. 305 * 306 * <p>{@link 307 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 308 * allows you to read from an in-memory archive.</p> 309 * 310 * @param channel the archive. 311 * @param archiveName name of the archive, used for error messages only. 312 * @param encoding the encoding to use for file names, use null 313 * for the platform's default encoding 314 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 315 * Extra Fields (if present) to set the file names. 316 * 317 * @throws IOException if an error occurs while reading the file. 318 * @since 1.13 319 */ 320 public ZipFile(final SeekableByteChannel channel, final String archiveName, 321 final String encoding, final boolean useUnicodeExtraFields) 322 throws IOException { 323 this(channel, archiveName, encoding, useUnicodeExtraFields, false, false); 324 } 325 326 /** 327 * Opens the given channel for reading, assuming the specified 328 * encoding for file names. 329 * 330 * <p>{@link 331 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 332 * allows you to read from an in-memory archive.</p> 333 * 334 * <p>By default the central directory record and all local file headers of the archive will be read immediately 335 * which may take a considerable amount of time when the archive is big. The {@code ignoreLocalFileHeader} parameter 336 * can be set to {@code true} which restricts parsing to the central directory. Unfortunately the local file header 337 * may contain information not present inside of the central directory which will not be available when the argument 338 * is set to {@code true}. This includes the content of the Unicode extra field, so setting {@code 339 * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively. Also 340 * {@link #getRawInputStream} is always going to return {@code null} if {@code ignoreLocalFileHeader} is {@code 341 * true}.</p> 342 * 343 * @param channel the archive. 344 * @param archiveName name of the archive, used for error messages only. 345 * @param encoding the encoding to use for file names, use null 346 * for the platform's default encoding 347 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 348 * Extra Fields (if present) to set the file names. 349 * @param ignoreLocalFileHeader whether to ignore information 350 * stored inside the local file header (see the notes in this method's javadoc) 351 * 352 * @throws IOException if an error occurs while reading the file. 353 * @since 1.19 354 */ 355 public ZipFile(final SeekableByteChannel channel, final String archiveName, 356 final String encoding, final boolean useUnicodeExtraFields, 357 final boolean ignoreLocalFileHeader) 358 throws IOException { 359 this(channel, archiveName, encoding, useUnicodeExtraFields, false, ignoreLocalFileHeader); 360 } 361 362 private ZipFile(final SeekableByteChannel channel, final String archiveName, 363 final String encoding, final boolean useUnicodeExtraFields, 364 final boolean closeOnError, final boolean ignoreLocalFileHeader) 365 throws IOException { 366 isSplitZipArchive = (channel instanceof ZipSplitReadOnlySeekableByteChannel); 367 368 this.archiveName = archiveName; 369 this.encoding = encoding; 370 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 371 this.useUnicodeExtraFields = useUnicodeExtraFields; 372 archive = channel; 373 boolean success = false; 374 try { 375 final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag = 376 populateFromCentralDirectory(); 377 if (!ignoreLocalFileHeader) { 378 resolveLocalFileHeaderData(entriesWithoutUTF8Flag); 379 } 380 fillNameMap(); 381 success = true; 382 } catch (final IOException e) { 383 throw new IOException("Error on ZipFile " + archiveName, e); 384 } finally { 385 closed = !success; 386 if (!success && closeOnError) { 387 IOUtils.closeQuietly(archive); 388 } 389 } 390 } 391 392 /** 393 * The encoding to use for file names and the file comment. 394 * 395 * @return null if using the platform's default character encoding. 396 */ 397 public String getEncoding() { 398 return encoding; 399 } 400 401 /** 402 * Closes the archive. 403 * @throws IOException if an error occurs closing the archive. 404 */ 405 @Override 406 public void close() throws IOException { 407 // this flag is only written here and read in finalize() which 408 // can never be run in parallel. 409 // no synchronization needed. 410 closed = true; 411 412 archive.close(); 413 } 414 415 /** 416 * close a zipfile quietly; throw no io fault, do nothing 417 * on a null parameter 418 * @param zipfile file to close, can be null 419 */ 420 public static void closeQuietly(final ZipFile zipfile) { 421 IOUtils.closeQuietly(zipfile); 422 } 423 424 /** 425 * Returns all entries. 426 * 427 * <p>Entries will be returned in the same order they appear 428 * within the archive's central directory.</p> 429 * 430 * @return all entries as {@link ZipArchiveEntry} instances 431 */ 432 public Enumeration<ZipArchiveEntry> getEntries() { 433 return Collections.enumeration(entries); 434 } 435 436 /** 437 * Returns all entries in physical order. 438 * 439 * <p>Entries will be returned in the same order their contents 440 * appear within the archive.</p> 441 * 442 * @return all entries as {@link ZipArchiveEntry} instances 443 * 444 * @since 1.1 445 */ 446 public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() { 447 final ZipArchiveEntry[] allEntries = entries.toArray(ZipArchiveEntry.EMPTY_ZIP_ARCHIVE_ENTRY_ARRAY); 448 Arrays.sort(allEntries, offsetComparator); 449 return Collections.enumeration(Arrays.asList(allEntries)); 450 } 451 452 /** 453 * Returns a named entry - or {@code null} if no entry by 454 * that name exists. 455 * 456 * <p>If multiple entries with the same name exist the first entry 457 * in the archive's central directory by that name is 458 * returned.</p> 459 * 460 * @param name name of the entry. 461 * @return the ZipArchiveEntry corresponding to the given name - or 462 * {@code null} if not present. 463 */ 464 public ZipArchiveEntry getEntry(final String name) { 465 final LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); 466 return entriesOfThatName != null ? entriesOfThatName.getFirst() : null; 467 } 468 469 /** 470 * Returns all named entries in the same order they appear within 471 * the archive's central directory. 472 * 473 * @param name name of the entry. 474 * @return the Iterable<ZipArchiveEntry> corresponding to the 475 * given name 476 * @since 1.6 477 */ 478 public Iterable<ZipArchiveEntry> getEntries(final String name) { 479 final List<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); 480 return entriesOfThatName != null ? entriesOfThatName 481 : Collections.emptyList(); 482 } 483 484 /** 485 * Returns all named entries in the same order their contents 486 * appear within the archive. 487 * 488 * @param name name of the entry. 489 * @return the Iterable<ZipArchiveEntry> corresponding to the 490 * given name 491 * @since 1.6 492 */ 493 public Iterable<ZipArchiveEntry> getEntriesInPhysicalOrder(final String name) { 494 ZipArchiveEntry[] entriesOfThatName = ZipArchiveEntry.EMPTY_ZIP_ARCHIVE_ENTRY_ARRAY; 495 if (nameMap.containsKey(name)) { 496 entriesOfThatName = nameMap.get(name).toArray(entriesOfThatName); 497 Arrays.sort(entriesOfThatName, offsetComparator); 498 } 499 return Arrays.asList(entriesOfThatName); 500 } 501 502 /** 503 * Whether this class is able to read the given entry. 504 * 505 * <p>May return false if it is set up to use encryption or a 506 * compression method that hasn't been implemented yet.</p> 507 * @since 1.1 508 * @param ze the entry 509 * @return whether this class is able to read the given entry. 510 */ 511 public boolean canReadEntryData(final ZipArchiveEntry ze) { 512 return ZipUtil.canHandleEntryData(ze); 513 } 514 515 /** 516 * Expose the raw stream of the archive entry (compressed form). 517 * 518 * <p>This method does not relate to how/if we understand the payload in the 519 * stream, since we really only intend to move it on to somewhere else.</p> 520 * 521 * @param ze The entry to get the stream for 522 * @return The raw input stream containing (possibly) compressed data. 523 * @since 1.11 524 */ 525 public InputStream getRawInputStream(final ZipArchiveEntry ze) { 526 if (!(ze instanceof Entry)) { 527 return null; 528 } 529 final long start = ze.getDataOffset(); 530 if (start == EntryStreamOffsets.OFFSET_UNKNOWN) { 531 return null; 532 } 533 return createBoundedInputStream(start, ze.getCompressedSize()); 534 } 535 536 537 /** 538 * Transfer selected entries from this zipfile to a given #ZipArchiveOutputStream. 539 * Compression and all other attributes will be as in this file. 540 * <p>This method transfers entries based on the central directory of the zip file.</p> 541 * 542 * @param target The zipArchiveOutputStream to write the entries to 543 * @param predicate A predicate that selects which entries to write 544 * @throws IOException on error 545 */ 546 public void copyRawEntries(final ZipArchiveOutputStream target, final ZipArchiveEntryPredicate predicate) 547 throws IOException { 548 final Enumeration<ZipArchiveEntry> src = getEntriesInPhysicalOrder(); 549 while (src.hasMoreElements()) { 550 final ZipArchiveEntry entry = src.nextElement(); 551 if (predicate.test( entry)) { 552 target.addRawArchiveEntry(entry, getRawInputStream(entry)); 553 } 554 } 555 } 556 557 /** 558 * Returns an InputStream for reading the contents of the given entry. 559 * 560 * @param ze the entry to get the stream for. 561 * @return a stream to read the entry from. The returned stream 562 * implements {@link InputStreamStatistics}. 563 * @throws IOException if unable to create an input stream from the zipentry 564 */ 565 public InputStream getInputStream(final ZipArchiveEntry ze) 566 throws IOException { 567 if (!(ze instanceof Entry)) { 568 return null; 569 } 570 // cast validity is checked just above 571 ZipUtil.checkRequestedFeatures(ze); 572 final long start = getDataOffset(ze); 573 574 // doesn't get closed if the method is not supported - which 575 // should never happen because of the checkRequestedFeatures 576 // call above 577 final InputStream is = 578 new BufferedInputStream(createBoundedInputStream(start, ze.getCompressedSize())); //NOSONAR 579 switch (ZipMethod.getMethodByCode(ze.getMethod())) { 580 case STORED: 581 return new StoredStatisticsStream(is); 582 case UNSHRINKING: 583 return new UnshrinkingInputStream(is); 584 case IMPLODING: 585 try { 586 return new ExplodingInputStream(ze.getGeneralPurposeBit().getSlidingDictionarySize(), 587 ze.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), is); 588 } catch (final IllegalArgumentException ex) { 589 throw new IOException("bad IMPLODE data", ex); 590 } 591 case DEFLATED: 592 final Inflater inflater = new Inflater(true); 593 // Inflater with nowrap=true has this odd contract for a zero padding 594 // byte following the data stream; this used to be zlib's requirement 595 // and has been fixed a long time ago, but the contract persists so 596 // we comply. 597 // https://docs.oracle.com/javase/7/docs/api/java/util/zip/Inflater.html#Inflater(boolean) 598 return new InflaterInputStreamWithStatistics(new SequenceInputStream(is, new ByteArrayInputStream(ONE_ZERO_BYTE)), 599 inflater) { 600 @Override 601 public void close() throws IOException { 602 try { 603 super.close(); 604 } finally { 605 inflater.end(); 606 } 607 } 608 }; 609 case BZIP2: 610 return new BZip2CompressorInputStream(is); 611 case ENHANCED_DEFLATED: 612 return new Deflate64CompressorInputStream(is); 613 case AES_ENCRYPTED: 614 case EXPANDING_LEVEL_1: 615 case EXPANDING_LEVEL_2: 616 case EXPANDING_LEVEL_3: 617 case EXPANDING_LEVEL_4: 618 case JPEG: 619 case LZMA: 620 case PKWARE_IMPLODING: 621 case PPMD: 622 case TOKENIZATION: 623 case UNKNOWN: 624 case WAVPACK: 625 case XZ: 626 default: 627 throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(ze.getMethod()), ze); 628 } 629 } 630 631 /** 632 * <p> 633 * Convenience method to return the entry's content as a String if isUnixSymlink() 634 * returns true for it, otherwise returns null. 635 * </p> 636 * 637 * <p>This method assumes the symbolic link's file name uses the 638 * same encoding that as been specified for this ZipFile.</p> 639 * 640 * @param entry ZipArchiveEntry object that represents the symbolic link 641 * @return entry's content as a String 642 * @throws IOException problem with content's input stream 643 * @since 1.5 644 */ 645 public String getUnixSymlink(final ZipArchiveEntry entry) throws IOException { 646 if (entry != null && entry.isUnixSymlink()) { 647 try (InputStream in = getInputStream(entry)) { 648 return zipEncoding.decode(IOUtils.toByteArray(in)); 649 } 650 } 651 return null; 652 } 653 654 /** 655 * Ensures that the close method of this zipfile is called when 656 * there are no more references to it. 657 * @see #close() 658 */ 659 @Override 660 protected void finalize() throws Throwable { 661 try { 662 if (!closed) { 663 System.err.println("Cleaning up unclosed ZipFile for archive " 664 + archiveName); 665 close(); 666 } 667 } finally { 668 super.finalize(); 669 } 670 } 671 672 /** 673 * Length of a "central directory" entry structure without file 674 * name, extra fields or comment. 675 */ 676 private static final int CFH_LEN = 677 /* version made by */ SHORT 678 /* version needed to extract */ + SHORT 679 /* general purpose bit flag */ + SHORT 680 /* compression method */ + SHORT 681 /* last mod file time */ + SHORT 682 /* last mod file date */ + SHORT 683 /* crc-32 */ + WORD 684 /* compressed size */ + WORD 685 /* uncompressed size */ + WORD 686 /* file name length */ + SHORT 687 /* extra field length */ + SHORT 688 /* file comment length */ + SHORT 689 /* disk number start */ + SHORT 690 /* internal file attributes */ + SHORT 691 /* external file attributes */ + WORD 692 /* relative offset of local header */ + WORD; 693 694 private static final long CFH_SIG = 695 ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG); 696 697 /** 698 * Reads the central directory of the given archive and populates 699 * the internal tables with ZipArchiveEntry instances. 700 * 701 * <p>The ZipArchiveEntrys will know all data that can be obtained from 702 * the central directory alone, but not the data that requires the 703 * local file header or additional data to be read.</p> 704 * 705 * @return a map of zipentries that didn't have the language 706 * encoding flag set when read. 707 */ 708 private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory() 709 throws IOException { 710 final HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag = 711 new HashMap<>(); 712 713 positionAtCentralDirectory(); 714 centralDirectoryStartOffset = archive.position(); 715 716 wordBbuf.rewind(); 717 IOUtils.readFully(archive, wordBbuf); 718 long sig = ZipLong.getValue(wordBuf); 719 720 if (sig != CFH_SIG && startsWithLocalFileHeader()) { 721 throw new IOException("Central directory is empty, can't expand" 722 + " corrupt archive."); 723 } 724 725 while (sig == CFH_SIG) { 726 readCentralDirectoryEntry(noUTF8Flag); 727 wordBbuf.rewind(); 728 IOUtils.readFully(archive, wordBbuf); 729 sig = ZipLong.getValue(wordBuf); 730 } 731 return noUTF8Flag; 732 } 733 734 /** 735 * Reads an individual entry of the central directory, creats an 736 * ZipArchiveEntry from it and adds it to the global maps. 737 * 738 * @param noUTF8Flag map used to collect entries that don't have 739 * their UTF-8 flag set and whose name will be set by data read 740 * from the local file header later. The current entry may be 741 * added to this map. 742 */ 743 private void 744 readCentralDirectoryEntry(final Map<ZipArchiveEntry, NameAndComment> noUTF8Flag) 745 throws IOException { 746 cfhBbuf.rewind(); 747 IOUtils.readFully(archive, cfhBbuf); 748 int off = 0; 749 final Entry ze = new Entry(); 750 751 final int versionMadeBy = ZipShort.getValue(cfhBuf, off); 752 off += SHORT; 753 ze.setVersionMadeBy(versionMadeBy); 754 ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK); 755 756 ze.setVersionRequired(ZipShort.getValue(cfhBuf, off)); 757 off += SHORT; // version required 758 759 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(cfhBuf, off); 760 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); 761 final ZipEncoding entryEncoding = 762 hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding; 763 if (hasUTF8Flag) { 764 ze.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG); 765 } 766 ze.setGeneralPurposeBit(gpFlag); 767 ze.setRawFlag(ZipShort.getValue(cfhBuf, off)); 768 769 off += SHORT; 770 771 //noinspection MagicConstant 772 ze.setMethod(ZipShort.getValue(cfhBuf, off)); 773 off += SHORT; 774 775 final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfhBuf, off)); 776 ze.setTime(time); 777 off += WORD; 778 779 ze.setCrc(ZipLong.getValue(cfhBuf, off)); 780 off += WORD; 781 782 long size = ZipLong.getValue(cfhBuf, off); 783 if (size < 0) { 784 throw new IOException("broken archive, entry with negative compressed size"); 785 } 786 ze.setCompressedSize(size); 787 off += WORD; 788 789 size = ZipLong.getValue(cfhBuf, off); 790 if (size < 0) { 791 throw new IOException("broken archive, entry with negative size"); 792 } 793 ze.setSize(size); 794 off += WORD; 795 796 final int fileNameLen = ZipShort.getValue(cfhBuf, off); 797 off += SHORT; 798 if (fileNameLen < 0) { 799 throw new IOException("broken archive, entry with negative fileNameLen"); 800 } 801 802 final int extraLen = ZipShort.getValue(cfhBuf, off); 803 off += SHORT; 804 if (extraLen < 0) { 805 throw new IOException("broken archive, entry with negative extraLen"); 806 } 807 808 final int commentLen = ZipShort.getValue(cfhBuf, off); 809 off += SHORT; 810 if (commentLen < 0) { 811 throw new IOException("broken archive, entry with negative commentLen"); 812 } 813 814 ze.setDiskNumberStart(ZipShort.getValue(cfhBuf, off)); 815 off += SHORT; 816 817 ze.setInternalAttributes(ZipShort.getValue(cfhBuf, off)); 818 off += SHORT; 819 820 ze.setExternalAttributes(ZipLong.getValue(cfhBuf, off)); 821 off += WORD; 822 823 final byte[] fileName = IOUtils.readRange(archive, fileNameLen); 824 if (fileName.length < fileNameLen) { 825 throw new EOFException(); 826 } 827 ze.setName(entryEncoding.decode(fileName), fileName); 828 829 // LFH offset, 830 ze.setLocalHeaderOffset(ZipLong.getValue(cfhBuf, off)); 831 // data offset will be filled later 832 entries.add(ze); 833 834 final byte[] cdExtraData = IOUtils.readRange(archive, extraLen); 835 if (cdExtraData.length < extraLen) { 836 throw new EOFException(); 837 } 838 try { 839 ze.setCentralDirectoryExtra(cdExtraData); 840 } catch (RuntimeException ex) { 841 final ZipException z = new ZipException("Invalid extra data in entry " + ze.getName()); 842 z.initCause(ex); 843 throw z; 844 } 845 846 setSizesAndOffsetFromZip64Extra(ze); 847 sanityCheckLFHOffset(ze); 848 849 final byte[] comment = IOUtils.readRange(archive, commentLen); 850 if (comment.length < commentLen) { 851 throw new EOFException(); 852 } 853 ze.setComment(entryEncoding.decode(comment)); 854 855 if (!hasUTF8Flag && useUnicodeExtraFields) { 856 noUTF8Flag.put(ze, new NameAndComment(fileName, comment)); 857 } 858 859 ze.setStreamContiguous(true); 860 } 861 862 private void sanityCheckLFHOffset(final ZipArchiveEntry ze) throws IOException { 863 if (ze.getDiskNumberStart() < 0) { 864 throw new IOException("broken archive, entry with negative disk number"); 865 } 866 if (ze.getLocalHeaderOffset() < 0) { 867 throw new IOException("broken archive, entry with negative local file header offset"); 868 } 869 if (isSplitZipArchive) { 870 if (ze.getDiskNumberStart() > centralDirectoryStartDiskNumber) { 871 throw new IOException("local file header for " + ze.getName() + " starts on a later disk than central directory"); 872 } 873 if (ze.getDiskNumberStart() == centralDirectoryStartDiskNumber 874 && ze.getLocalHeaderOffset() > centralDirectoryStartRelativeOffset) { 875 throw new IOException("local file header for " + ze.getName() + " starts after central directory"); 876 } 877 } else { 878 if (ze.getLocalHeaderOffset() > centralDirectoryStartOffset) { 879 throw new IOException("local file header for " + ze.getName() + " starts after central directory"); 880 } 881 } 882 } 883 884 /** 885 * If the entry holds a Zip64 extended information extra field, 886 * read sizes from there if the entry's sizes are set to 887 * 0xFFFFFFFFF, do the same for the offset of the local file 888 * header. 889 * 890 * <p>Ensures the Zip64 extra either knows both compressed and 891 * uncompressed size or neither of both as the internal logic in 892 * ExtraFieldUtils forces the field to create local header data 893 * even if they are never used - and here a field with only one 894 * size would be invalid.</p> 895 */ 896 private void setSizesAndOffsetFromZip64Extra(final ZipArchiveEntry ze) 897 throws IOException { 898 final ZipExtraField extra = 899 ze.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID); 900 if (extra != null && !(extra instanceof Zip64ExtendedInformationExtraField)) { 901 throw new ZipException("archive contains unparseable zip64 extra field"); 902 } 903 final Zip64ExtendedInformationExtraField z64 = 904 (Zip64ExtendedInformationExtraField) extra; 905 if (z64 != null) { 906 final boolean hasUncompressedSize = ze.getSize() == ZIP64_MAGIC; 907 final boolean hasCompressedSize = ze.getCompressedSize() == ZIP64_MAGIC; 908 final boolean hasRelativeHeaderOffset = 909 ze.getLocalHeaderOffset() == ZIP64_MAGIC; 910 final boolean hasDiskStart = ze.getDiskNumberStart() == ZIP64_MAGIC_SHORT; 911 z64.reparseCentralDirectoryData(hasUncompressedSize, 912 hasCompressedSize, 913 hasRelativeHeaderOffset, 914 hasDiskStart); 915 916 if (hasUncompressedSize) { 917 final long size = z64.getSize().getLongValue(); 918 if (size < 0) { 919 throw new IOException("broken archive, entry with negative size"); 920 } 921 ze.setSize(size); 922 } else if (hasCompressedSize) { 923 z64.setSize(new ZipEightByteInteger(ze.getSize())); 924 } 925 926 if (hasCompressedSize) { 927 final long size = z64.getCompressedSize().getLongValue(); 928 if (size < 0) { 929 throw new IOException("broken archive, entry with negative compressed size"); 930 } 931 ze.setCompressedSize(size); 932 } else if (hasUncompressedSize) { 933 z64.setCompressedSize(new ZipEightByteInteger(ze.getCompressedSize())); 934 } 935 936 if (hasRelativeHeaderOffset) { 937 ze.setLocalHeaderOffset(z64.getRelativeHeaderOffset().getLongValue()); 938 } 939 940 if (hasDiskStart) { 941 ze.setDiskNumberStart(z64.getDiskStartNumber().getValue()); 942 } 943 } 944 } 945 946 /** 947 * Length of the "End of central directory record" - which is 948 * supposed to be the last structure of the archive - without file 949 * comment. 950 */ 951 static final int MIN_EOCD_SIZE = 952 /* end of central dir signature */ WORD 953 /* number of this disk */ + SHORT 954 /* number of the disk with the */ 955 /* start of the central directory */ + SHORT 956 /* total number of entries in */ 957 /* the central dir on this disk */ + SHORT 958 /* total number of entries in */ 959 /* the central dir */ + SHORT 960 /* size of the central directory */ + WORD 961 /* offset of start of central */ 962 /* directory with respect to */ 963 /* the starting disk number */ + WORD 964 /* zipfile comment length */ + SHORT; 965 966 /** 967 * Maximum length of the "End of central directory record" with a 968 * file comment. 969 */ 970 private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE 971 /* maximum length of zipfile comment */ + ZIP64_MAGIC_SHORT; 972 973 /** 974 * Offset of the field that holds the location of the first 975 * central directory entry inside the "End of central directory 976 * record" relative to the start of the "End of central directory 977 * record". 978 */ 979 private static final int CFD_LOCATOR_OFFSET = 980 /* end of central dir signature */ WORD 981 /* number of this disk */ + SHORT 982 /* number of the disk with the */ 983 /* start of the central directory */ + SHORT 984 /* total number of entries in */ 985 /* the central dir on this disk */ + SHORT 986 /* total number of entries in */ 987 /* the central dir */ + SHORT 988 /* size of the central directory */ + WORD; 989 990 /** 991 * Offset of the field that holds the disk number of the first 992 * central directory entry inside the "End of central directory 993 * record" relative to the start of the "End of central directory 994 * record". 995 */ 996 private static final int CFD_DISK_OFFSET = 997 /* end of central dir signature */ WORD 998 /* number of this disk */ + SHORT; 999 1000 /** 1001 * Offset of the field that holds the location of the first 1002 * central directory entry inside the "End of central directory 1003 * record" relative to the "number of the disk with the start 1004 * of the central directory". 1005 */ 1006 private static final int CFD_LOCATOR_RELATIVE_OFFSET = 1007 /* total number of entries in */ 1008 /* the central dir on this disk */ + SHORT 1009 /* total number of entries in */ 1010 /* the central dir */ + SHORT 1011 /* size of the central directory */ + WORD; 1012 1013 /** 1014 * Length of the "Zip64 end of central directory locator" - which 1015 * should be right in front of the "end of central directory 1016 * record" if one is present at all. 1017 */ 1018 private static final int ZIP64_EOCDL_LENGTH = 1019 /* zip64 end of central dir locator sig */ WORD 1020 /* number of the disk with the start */ 1021 /* start of the zip64 end of */ 1022 /* central directory */ + WORD 1023 /* relative offset of the zip64 */ 1024 /* end of central directory record */ + DWORD 1025 /* total number of disks */ + WORD; 1026 1027 /** 1028 * Offset of the field that holds the location of the "Zip64 end 1029 * of central directory record" inside the "Zip64 end of central 1030 * directory locator" relative to the start of the "Zip64 end of 1031 * central directory locator". 1032 */ 1033 private static final int ZIP64_EOCDL_LOCATOR_OFFSET = 1034 /* zip64 end of central dir locator sig */ WORD 1035 /* number of the disk with the start */ 1036 /* start of the zip64 end of */ 1037 /* central directory */ + WORD; 1038 1039 /** 1040 * Offset of the field that holds the location of the first 1041 * central directory entry inside the "Zip64 end of central 1042 * directory record" relative to the start of the "Zip64 end of 1043 * central directory record". 1044 */ 1045 private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET = 1046 /* zip64 end of central dir */ 1047 /* signature */ WORD 1048 /* size of zip64 end of central */ 1049 /* directory record */ + DWORD 1050 /* version made by */ + SHORT 1051 /* version needed to extract */ + SHORT 1052 /* number of this disk */ + WORD 1053 /* number of the disk with the */ 1054 /* start of the central directory */ + WORD 1055 /* total number of entries in the */ 1056 /* central directory on this disk */ + DWORD 1057 /* total number of entries in the */ 1058 /* central directory */ + DWORD 1059 /* size of the central directory */ + DWORD; 1060 1061 /** 1062 * Offset of the field that holds the disk number of the first 1063 * central directory entry inside the "Zip64 end of central 1064 * directory record" relative to the start of the "Zip64 end of 1065 * central directory record". 1066 */ 1067 private static final int ZIP64_EOCD_CFD_DISK_OFFSET = 1068 /* zip64 end of central dir */ 1069 /* signature */ WORD 1070 /* size of zip64 end of central */ 1071 /* directory record */ + DWORD 1072 /* version made by */ + SHORT 1073 /* version needed to extract */ + SHORT 1074 /* number of this disk */ + WORD; 1075 1076 /** 1077 * Offset of the field that holds the location of the first 1078 * central directory entry inside the "Zip64 end of central 1079 * directory record" relative to the "number of the disk 1080 * with the start of the central directory". 1081 */ 1082 private static final int ZIP64_EOCD_CFD_LOCATOR_RELATIVE_OFFSET = 1083 /* total number of entries in the */ 1084 /* central directory on this disk */ DWORD 1085 /* total number of entries in the */ 1086 /* central directory */ + DWORD 1087 /* size of the central directory */ + DWORD; 1088 1089 /** 1090 * Searches for either the "Zip64 end of central directory 1091 * locator" or the "End of central dir record", parses 1092 * it and positions the stream at the first central directory 1093 * record. 1094 */ 1095 private void positionAtCentralDirectory() 1096 throws IOException { 1097 positionAtEndOfCentralDirectoryRecord(); 1098 boolean found = false; 1099 final boolean searchedForZip64EOCD = 1100 archive.position() > ZIP64_EOCDL_LENGTH; 1101 if (searchedForZip64EOCD) { 1102 archive.position(archive.position() - ZIP64_EOCDL_LENGTH); 1103 wordBbuf.rewind(); 1104 IOUtils.readFully(archive, wordBbuf); 1105 found = Arrays.equals(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG, 1106 wordBuf); 1107 } 1108 if (!found) { 1109 // not a ZIP64 archive 1110 if (searchedForZip64EOCD) { 1111 skipBytes(ZIP64_EOCDL_LENGTH - WORD); 1112 } 1113 positionAtCentralDirectory32(); 1114 } else { 1115 positionAtCentralDirectory64(); 1116 } 1117 } 1118 1119 /** 1120 * Parses the "Zip64 end of central directory locator", 1121 * finds the "Zip64 end of central directory record" using the 1122 * parsed information, parses that and positions the stream at the 1123 * first central directory record. 1124 * 1125 * Expects stream to be positioned right behind the "Zip64 1126 * end of central directory locator"'s signature. 1127 */ 1128 private void positionAtCentralDirectory64() 1129 throws IOException { 1130 if (isSplitZipArchive) { 1131 wordBbuf.rewind(); 1132 IOUtils.readFully(archive, wordBbuf); 1133 final long diskNumberOfEOCD = ZipLong.getValue(wordBuf); 1134 1135 dwordBbuf.rewind(); 1136 IOUtils.readFully(archive, dwordBbuf); 1137 final long relativeOffsetOfEOCD = ZipEightByteInteger.getLongValue(dwordBuf); 1138 ((ZipSplitReadOnlySeekableByteChannel) archive) 1139 .position(diskNumberOfEOCD, relativeOffsetOfEOCD); 1140 } else { 1141 skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET 1142 - WORD /* signature has already been read */); 1143 dwordBbuf.rewind(); 1144 IOUtils.readFully(archive, dwordBbuf); 1145 archive.position(ZipEightByteInteger.getLongValue(dwordBuf)); 1146 } 1147 1148 wordBbuf.rewind(); 1149 IOUtils.readFully(archive, wordBbuf); 1150 if (!Arrays.equals(wordBuf, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) { 1151 throw new ZipException("Archive's ZIP64 end of central " 1152 + "directory locator is corrupt."); 1153 } 1154 1155 if (isSplitZipArchive) { 1156 skipBytes(ZIP64_EOCD_CFD_DISK_OFFSET 1157 - WORD /* signature has already been read */); 1158 wordBbuf.rewind(); 1159 IOUtils.readFully(archive, wordBbuf); 1160 centralDirectoryStartDiskNumber = ZipLong.getValue(wordBuf); 1161 1162 skipBytes(ZIP64_EOCD_CFD_LOCATOR_RELATIVE_OFFSET); 1163 1164 dwordBbuf.rewind(); 1165 IOUtils.readFully(archive, dwordBbuf); 1166 centralDirectoryStartRelativeOffset = ZipEightByteInteger.getLongValue(dwordBuf); 1167 ((ZipSplitReadOnlySeekableByteChannel) archive) 1168 .position(centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset); 1169 } else { 1170 skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET 1171 - WORD /* signature has already been read */); 1172 dwordBbuf.rewind(); 1173 IOUtils.readFully(archive, dwordBbuf); 1174 centralDirectoryStartDiskNumber = 0; 1175 centralDirectoryStartRelativeOffset = ZipEightByteInteger.getLongValue(dwordBuf); 1176 archive.position(centralDirectoryStartRelativeOffset); 1177 } 1178 } 1179 1180 /** 1181 * Parses the "End of central dir record" and positions 1182 * the stream at the first central directory record. 1183 * 1184 * Expects stream to be positioned at the beginning of the 1185 * "End of central dir record". 1186 */ 1187 private void positionAtCentralDirectory32() 1188 throws IOException { 1189 if (isSplitZipArchive) { 1190 skipBytes(CFD_DISK_OFFSET); 1191 shortBbuf.rewind(); 1192 IOUtils.readFully(archive, shortBbuf); 1193 centralDirectoryStartDiskNumber = ZipShort.getValue(shortBuf); 1194 1195 skipBytes(CFD_LOCATOR_RELATIVE_OFFSET); 1196 1197 wordBbuf.rewind(); 1198 IOUtils.readFully(archive, wordBbuf); 1199 centralDirectoryStartRelativeOffset = ZipLong.getValue(wordBuf); 1200 ((ZipSplitReadOnlySeekableByteChannel) archive) 1201 .position(centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset); 1202 } else { 1203 skipBytes(CFD_LOCATOR_OFFSET); 1204 wordBbuf.rewind(); 1205 IOUtils.readFully(archive, wordBbuf); 1206 centralDirectoryStartDiskNumber = 0; 1207 centralDirectoryStartRelativeOffset = ZipLong.getValue(wordBuf); 1208 archive.position(centralDirectoryStartRelativeOffset); 1209 } 1210 } 1211 1212 /** 1213 * Searches for the and positions the stream at the start of the 1214 * "End of central dir record". 1215 */ 1216 private void positionAtEndOfCentralDirectoryRecord() 1217 throws IOException { 1218 final boolean found = tryToLocateSignature(MIN_EOCD_SIZE, MAX_EOCD_SIZE, 1219 ZipArchiveOutputStream.EOCD_SIG); 1220 if (!found) { 1221 throw new ZipException("Archive is not a ZIP archive"); 1222 } 1223 } 1224 1225 /** 1226 * Searches the archive backwards from minDistance to maxDistance 1227 * for the given signature, positions the RandomaccessFile right 1228 * at the signature if it has been found. 1229 */ 1230 private boolean tryToLocateSignature(final long minDistanceFromEnd, 1231 final long maxDistanceFromEnd, 1232 final byte[] sig) throws IOException { 1233 boolean found = false; 1234 long off = archive.size() - minDistanceFromEnd; 1235 final long stopSearching = 1236 Math.max(0L, archive.size() - maxDistanceFromEnd); 1237 if (off >= 0) { 1238 for (; off >= stopSearching; off--) { 1239 archive.position(off); 1240 try { 1241 wordBbuf.rewind(); 1242 IOUtils.readFully(archive, wordBbuf); 1243 wordBbuf.flip(); 1244 } catch (final EOFException ex) { // NOSONAR 1245 break; 1246 } 1247 int curr = wordBbuf.get(); 1248 if (curr == sig[POS_0]) { 1249 curr = wordBbuf.get(); 1250 if (curr == sig[POS_1]) { 1251 curr = wordBbuf.get(); 1252 if (curr == sig[POS_2]) { 1253 curr = wordBbuf.get(); 1254 if (curr == sig[POS_3]) { 1255 found = true; 1256 break; 1257 } 1258 } 1259 } 1260 } 1261 } 1262 } 1263 if (found) { 1264 archive.position(off); 1265 } 1266 return found; 1267 } 1268 1269 /** 1270 * Skips the given number of bytes or throws an EOFException if 1271 * skipping failed. 1272 */ 1273 private void skipBytes(final int count) throws IOException { 1274 final long currentPosition = archive.position(); 1275 final long newPosition = currentPosition + count; 1276 if (newPosition > archive.size()) { 1277 throw new EOFException(); 1278 } 1279 archive.position(newPosition); 1280 } 1281 1282 /** 1283 * Number of bytes in local file header up to the "length of 1284 * file name" entry. 1285 */ 1286 private static final long LFH_OFFSET_FOR_FILENAME_LENGTH = 1287 /* local file header signature */ WORD 1288 /* version needed to extract */ + SHORT 1289 /* general purpose bit flag */ + SHORT 1290 /* compression method */ + SHORT 1291 /* last mod file time */ + SHORT 1292 /* last mod file date */ + SHORT 1293 /* crc-32 */ + WORD 1294 /* compressed size */ + WORD 1295 /* uncompressed size */ + (long) WORD; 1296 1297 /** 1298 * Walks through all recorded entries and adds the data available 1299 * from the local file header. 1300 * 1301 * <p>Also records the offsets for the data to read from the 1302 * entries.</p> 1303 */ 1304 private void resolveLocalFileHeaderData(final Map<ZipArchiveEntry, NameAndComment> 1305 entriesWithoutUTF8Flag) 1306 throws IOException { 1307 for (final ZipArchiveEntry zipArchiveEntry : entries) { 1308 // entries is filled in populateFromCentralDirectory and 1309 // never modified 1310 final Entry ze = (Entry) zipArchiveEntry; 1311 final int[] lens = setDataOffset(ze); 1312 final int fileNameLen = lens[0]; 1313 final int extraFieldLen = lens[1]; 1314 skipBytes(fileNameLen); 1315 final byte[] localExtraData = IOUtils.readRange(archive, extraFieldLen); 1316 if (localExtraData.length < extraFieldLen) { 1317 throw new EOFException(); 1318 } 1319 try { 1320 ze.setExtra(localExtraData); 1321 } catch (RuntimeException ex) { 1322 final ZipException z = new ZipException("Invalid extra data in entry " + ze.getName()); 1323 z.initCause(ex); 1324 throw z; 1325 } 1326 1327 if (entriesWithoutUTF8Flag.containsKey(ze)) { 1328 final NameAndComment nc = entriesWithoutUTF8Flag.get(ze); 1329 ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name, 1330 nc.comment); 1331 } 1332 } 1333 } 1334 1335 private void fillNameMap() { 1336 for (final ZipArchiveEntry ze : entries) { 1337 // entries is filled in populateFromCentralDirectory and 1338 // never modified 1339 final String name = ze.getName(); 1340 LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.computeIfAbsent(name, k -> new LinkedList<>()); 1341 entriesOfThatName.addLast(ze); 1342 } 1343 } 1344 1345 private int[] setDataOffset(final ZipArchiveEntry ze) throws IOException { 1346 long offset = ze.getLocalHeaderOffset(); 1347 if (isSplitZipArchive) { 1348 ((ZipSplitReadOnlySeekableByteChannel) archive) 1349 .position(ze.getDiskNumberStart(), offset + LFH_OFFSET_FOR_FILENAME_LENGTH); 1350 // the offset should be updated to the global offset 1351 offset = archive.position() - LFH_OFFSET_FOR_FILENAME_LENGTH; 1352 } else { 1353 archive.position(offset + LFH_OFFSET_FOR_FILENAME_LENGTH); 1354 } 1355 wordBbuf.rewind(); 1356 IOUtils.readFully(archive, wordBbuf); 1357 wordBbuf.flip(); 1358 wordBbuf.get(shortBuf); 1359 final int fileNameLen = ZipShort.getValue(shortBuf); 1360 wordBbuf.get(shortBuf); 1361 final int extraFieldLen = ZipShort.getValue(shortBuf); 1362 ze.setDataOffset(offset + LFH_OFFSET_FOR_FILENAME_LENGTH 1363 + SHORT + SHORT + fileNameLen + extraFieldLen); 1364 if (ze.getDataOffset() + ze.getCompressedSize() > centralDirectoryStartOffset) { 1365 throw new IOException("data for " + ze.getName() + " overlaps with central directory."); 1366 } 1367 return new int[] { fileNameLen, extraFieldLen }; 1368 } 1369 1370 private long getDataOffset(final ZipArchiveEntry ze) throws IOException { 1371 final long s = ze.getDataOffset(); 1372 if (s == EntryStreamOffsets.OFFSET_UNKNOWN) { 1373 setDataOffset(ze); 1374 return ze.getDataOffset(); 1375 } 1376 return s; 1377 } 1378 1379 /** 1380 * Checks whether the archive starts with a LFH. If it doesn't, 1381 * it may be an empty archive. 1382 */ 1383 private boolean startsWithLocalFileHeader() throws IOException { 1384 archive.position(0); 1385 wordBbuf.rewind(); 1386 IOUtils.readFully(archive, wordBbuf); 1387 return Arrays.equals(wordBuf, ZipArchiveOutputStream.LFH_SIG); 1388 } 1389 1390 /** 1391 * Creates new BoundedInputStream, according to implementation of 1392 * underlying archive channel. 1393 */ 1394 private BoundedArchiveInputStream createBoundedInputStream(final long start, final long remaining) { 1395 if (start < 0 || remaining < 0 || start + remaining < start) { 1396 throw new IllegalArgumentException("Corrupted archive, stream boundaries" 1397 + " are out of range"); 1398 } 1399 return archive instanceof FileChannel ? 1400 new BoundedFileChannelInputStream(start, remaining) : 1401 new BoundedSeekableByteChannelInputStream(start, remaining, archive); 1402 } 1403 1404 /** 1405 * Lock-free implementation of BoundedInputStream. The 1406 * implementation uses positioned reads on the underlying archive 1407 * file channel and therefore performs significantly faster in 1408 * concurrent environment. 1409 */ 1410 private class BoundedFileChannelInputStream extends BoundedArchiveInputStream { 1411 private final FileChannel archive; 1412 1413 BoundedFileChannelInputStream(final long start, final long remaining) { 1414 super(start, remaining); 1415 archive = (FileChannel) ZipFile.this.archive; 1416 } 1417 1418 @Override 1419 protected int read(final long pos, final ByteBuffer buf) throws IOException { 1420 final int read = archive.read(buf, pos); 1421 buf.flip(); 1422 return read; 1423 } 1424 } 1425 1426 private static final class NameAndComment { 1427 private final byte[] name; 1428 private final byte[] comment; 1429 private NameAndComment(final byte[] name, final byte[] comment) { 1430 this.name = name; 1431 this.comment = comment; 1432 } 1433 } 1434 1435 /** 1436 * Compares two ZipArchiveEntries based on their offset within the archive. 1437 * 1438 * <p>Won't return any meaningful results if one of the entries 1439 * isn't part of the archive at all.</p> 1440 * 1441 * @since 1.1 1442 */ 1443 private final Comparator<ZipArchiveEntry> offsetComparator = 1444 Comparator.comparingLong(ZipArchiveEntry::getDiskNumberStart) 1445 .thenComparingLong(ZipArchiveEntry::getLocalHeaderOffset); 1446 1447 /** 1448 * Extends ZipArchiveEntry to store the offset within the archive. 1449 */ 1450 private static class Entry extends ZipArchiveEntry { 1451 1452 Entry() { 1453 } 1454 1455 @Override 1456 public int hashCode() { 1457 return 3 * super.hashCode() 1458 + (int) getLocalHeaderOffset()+(int)(getLocalHeaderOffset()>>32); 1459 } 1460 1461 @Override 1462 public boolean equals(final Object other) { 1463 if (super.equals(other)) { 1464 // super.equals would return false if other were not an Entry 1465 final Entry otherEntry = (Entry) other; 1466 return getLocalHeaderOffset() 1467 == otherEntry.getLocalHeaderOffset() 1468 && super.getDataOffset() 1469 == otherEntry.getDataOffset() 1470 && super.getDiskNumberStart() 1471 == otherEntry.getDiskNumberStart(); 1472 } 1473 return false; 1474 } 1475 } 1476 1477 private static class StoredStatisticsStream extends CountingInputStream implements InputStreamStatistics { 1478 StoredStatisticsStream(final InputStream in) { 1479 super(in); 1480 } 1481 1482 @Override 1483 public long getCompressedCount() { 1484 return super.getBytesRead(); 1485 } 1486 1487 @Override 1488 public long getUncompressedCount() { 1489 return getCompressedCount(); 1490 } 1491 } 1492}