001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one or more
003 *  contributor license agreements.  See the NOTICE file distributed with
004 *  this work for additional information regarding copyright ownership.
005 *  The ASF licenses this file to You under the Apache License, Version 2.0
006 *  (the "License"); you may not use this file except in compliance with
007 *  the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 *  Unless required by applicable law or agreed to in writing, software
012 *  distributed under the License is distributed on an "AS IS" BASIS,
013 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 *  See the License for the specific language governing permissions and
015 *  limitations under the License.
016 */
017package org.apache.commons.compress.archivers.zip;
018
019import java.io.BufferedInputStream;
020import java.io.ByteArrayInputStream;
021import java.io.Closeable;
022import java.io.EOFException;
023import java.io.File;
024import java.io.IOException;
025import java.io.InputStream;
026import java.io.SequenceInputStream;
027import java.nio.ByteBuffer;
028import java.nio.channels.FileChannel;
029import java.nio.channels.SeekableByteChannel;
030import java.nio.file.Files;
031import java.nio.file.Path;
032import java.nio.file.StandardOpenOption;
033import java.util.Arrays;
034import java.util.Collections;
035import java.util.Comparator;
036import java.util.EnumSet;
037import java.util.Enumeration;
038import java.util.HashMap;
039import java.util.LinkedList;
040import java.util.List;
041import java.util.Map;
042import java.util.zip.Inflater;
043import java.util.zip.ZipException;
044
045import org.apache.commons.compress.archivers.EntryStreamOffsets;
046import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
047import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream;
048import org.apache.commons.compress.utils.BoundedArchiveInputStream;
049import org.apache.commons.compress.utils.BoundedSeekableByteChannelInputStream;
050import org.apache.commons.compress.utils.CountingInputStream;
051import org.apache.commons.compress.utils.IOUtils;
052import org.apache.commons.compress.utils.InputStreamStatistics;
053
054/**
055 * Replacement for {@code java.util.ZipFile}.
056 *
057 * <p>This class adds support for file name encodings other than UTF-8
058 * (which is required to work on ZIP files created by native ZIP tools
059 * and is able to skip a preamble like the one found in self
060 * extracting archives.  Furthermore it returns instances of
061 * {@code org.apache.commons.compress.archivers.zip.ZipArchiveEntry}
062 * instead of {@code java.util.zip.ZipEntry}.</p>
063 *
064 * <p>It doesn't extend {@code java.util.zip.ZipFile} as it would
065 * have to reimplement all methods anyway.  Like
066 * {@code java.util.ZipFile}, it uses SeekableByteChannel under the
067 * covers and supports compressed and uncompressed entries.  As of
068 * Apache Commons Compress 1.3 it also transparently supports Zip64
069 * extensions and thus individual entries and archives larger than 4
070 * GB or with more than 65536 entries.</p>
071 *
072 * <p>The method signatures mimic the ones of
073 * {@code java.util.zip.ZipFile}, with a couple of exceptions:
074 *
075 * <ul>
076 *   <li>There is no getName method.</li>
077 *   <li>entries has been renamed to getEntries.</li>
078 *   <li>getEntries and getEntry return
079 *   {@code org.apache.commons.compress.archivers.zip.ZipArchiveEntry}
080 *   instances.</li>
081 *   <li>close is allowed to throw IOException.</li>
082 * </ul>
083 */
084public class ZipFile implements Closeable {
085    /**
086     * Lock-free implementation of BoundedInputStream. The
087     * implementation uses positioned reads on the underlying archive
088     * file channel and therefore performs significantly faster in
089     * concurrent environment.
090     */
091    private class BoundedFileChannelInputStream extends BoundedArchiveInputStream {
092        private final FileChannel archive;
093
094        BoundedFileChannelInputStream(final long start, final long remaining) {
095            super(start, remaining);
096            archive = (FileChannel) ZipFile.this.archive;
097        }
098
099        @Override
100        protected int read(final long pos, final ByteBuffer buf) throws IOException {
101            final int read = archive.read(buf, pos);
102            buf.flip();
103            return read;
104        }
105    }
106    /**
107     * Extends ZipArchiveEntry to store the offset within the archive.
108     */
109    private static class Entry extends ZipArchiveEntry {
110
111        Entry() {
112        }
113
114        @Override
115        public boolean equals(final Object other) {
116            if (super.equals(other)) {
117                // super.equals would return false if other were not an Entry
118                final Entry otherEntry = (Entry) other;
119                return getLocalHeaderOffset()
120                        == otherEntry.getLocalHeaderOffset()
121                    && super.getDataOffset()
122                        == otherEntry.getDataOffset()
123                    && super.getDiskNumberStart()
124                        == otherEntry.getDiskNumberStart();
125            }
126            return false;
127        }
128
129        @Override
130        public int hashCode() {
131            return 3 * super.hashCode()
132                + (int) getLocalHeaderOffset()+(int)(getLocalHeaderOffset()>>32);
133        }
134    }
135    private static final class NameAndComment {
136        private final byte[] name;
137        private final byte[] comment;
138        private NameAndComment(final byte[] name, final byte[] comment) {
139            this.name = name;
140            this.comment = comment;
141        }
142    }
143    private static class StoredStatisticsStream extends CountingInputStream implements InputStreamStatistics {
144        StoredStatisticsStream(final InputStream in) {
145            super(in);
146        }
147
148        @Override
149        public long getCompressedCount() {
150            return super.getBytesRead();
151        }
152
153        @Override
154        public long getUncompressedCount() {
155            return getCompressedCount();
156        }
157    }
158    private static final int HASH_SIZE = 509;
159    static final int NIBLET_MASK = 0x0f;
160    static final int BYTE_SHIFT = 8;
161    private static final int POS_0 = 0;
162
163    private static final int POS_1 = 1;
164
165    private static final int POS_2 = 2;
166
167    private static final int POS_3 = 3;
168
169    private static final byte[] ONE_ZERO_BYTE = new byte[1];
170
171    /**
172     * Length of a "central directory" entry structure without file
173     * name, extra fields or comment.
174     */
175    private static final int CFH_LEN =
176        /* version made by                 */ ZipConstants.SHORT
177        /* version needed to extract       */ + ZipConstants.SHORT
178        /* general purpose bit flag        */ + ZipConstants.SHORT
179        /* compression method              */ + ZipConstants.SHORT
180        /* last mod file time              */ + ZipConstants.SHORT
181        /* last mod file date              */ + ZipConstants.SHORT
182        /* crc-32                          */ + ZipConstants.WORD
183        /* compressed size                 */ + ZipConstants.WORD
184        /* uncompressed size               */ + ZipConstants.WORD
185        /* file name length                */ + ZipConstants. SHORT
186        /* extra field length              */ + ZipConstants.SHORT
187        /* file comment length             */ + ZipConstants.SHORT
188        /* disk number start               */ + ZipConstants.SHORT
189        /* internal file attributes        */ + ZipConstants.SHORT
190        /* external file attributes        */ + ZipConstants.WORD
191        /* relative offset of local header */ + ZipConstants.WORD;
192
193    private static final long CFH_SIG =
194        ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG);
195
196    /**
197     * Length of the "End of central directory record" - which is
198     * supposed to be the last structure of the archive - without file
199     * comment.
200     */
201    static final int MIN_EOCD_SIZE =
202        /* end of central dir signature    */ ZipConstants.WORD
203        /* number of this disk             */ + ZipConstants.SHORT
204        /* number of the disk with the     */
205        /* start of the central directory  */ + ZipConstants.SHORT
206        /* total number of entries in      */
207        /* the central dir on this disk    */ + ZipConstants.SHORT
208        /* total number of entries in      */
209        /* the central dir                 */ + ZipConstants.SHORT
210        /* size of the central directory   */ + ZipConstants.WORD
211        /* offset of start of central      */
212        /* directory with respect to       */
213        /* the starting disk number        */ + ZipConstants.WORD
214        /* ZIP file comment length         */ + ZipConstants.SHORT;
215
216    /**
217     * Maximum length of the "End of central directory record" with a
218     * file comment.
219     */
220    private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE
221        /* maximum length of ZIP file comment */ + ZipConstants.ZIP64_MAGIC_SHORT;
222
223    /**
224     * Offset of the field that holds the location of the length of
225     * the central directory inside the "End of central directory
226     * record" relative to the start of the "End of central directory
227     * record".
228     */
229    private static final int CFD_LENGTH_OFFSET =
230        /* end of central dir signature    */ ZipConstants.WORD
231        /* number of this disk             */ + ZipConstants.SHORT
232        /* number of the disk with the     */
233        /* start of the central directory  */ + ZipConstants.SHORT
234        /* total number of entries in      */
235        /* the central dir on this disk    */ + ZipConstants.SHORT
236        /* total number of entries in      */
237        /* the central dir                 */ + ZipConstants.SHORT;
238
239    /**
240     * Offset of the field that holds the disk number of the first
241     * central directory entry inside the "End of central directory
242     * record" relative to the start of the "End of central directory
243     * record".
244     */
245    private static final int CFD_DISK_OFFSET =
246            /* end of central dir signature    */ ZipConstants.WORD
247            /* number of this disk             */ + ZipConstants.SHORT;
248    /**
249     * Offset of the field that holds the location of the first
250     * central directory entry inside the "End of central directory
251     * record" relative to the "number of the disk with the start
252     * of the central directory".
253     */
254    private static final int CFD_LOCATOR_RELATIVE_OFFSET =
255            /* total number of entries in      */
256            /* the central dir on this disk    */ + ZipConstants.SHORT
257            /* total number of entries in      */
258            /* the central dir                 */ + ZipConstants.SHORT
259            /* size of the central directory   */ + ZipConstants.WORD;
260    /**
261     * Length of the "Zip64 end of central directory locator" - which
262     * should be right in front of the "end of central directory
263     * record" if one is present at all.
264     */
265    private static final int ZIP64_EOCDL_LENGTH =
266        /* zip64 end of central dir locator sig */ ZipConstants.WORD
267        /* number of the disk with the start    */
268        /* start of the zip64 end of            */
269        /* central directory                    */ + ZipConstants.WORD
270        /* relative offset of the zip64         */
271        /* end of central directory record      */ + ZipConstants.DWORD
272        /* total number of disks                */ + ZipConstants.WORD;
273    /**
274     * Offset of the field that holds the location of the "Zip64 end
275     * of central directory record" inside the "Zip64 end of central
276     * directory locator" relative to the start of the "Zip64 end of
277     * central directory locator".
278     */
279    private static final int ZIP64_EOCDL_LOCATOR_OFFSET =
280        /* zip64 end of central dir locator sig */ ZipConstants.WORD
281        /* number of the disk with the start    */
282        /* start of the zip64 end of            */
283        /* central directory                    */ + ZipConstants.WORD;
284    /**
285     * Offset of the field that holds the location of the first
286     * central directory entry inside the "Zip64 end of central
287     * directory record" relative to the start of the "Zip64 end of
288     * central directory record".
289     */
290    private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET =
291        /* zip64 end of central dir        */
292        /* signature                       */ ZipConstants.WORD
293        /* size of zip64 end of central    */
294        /* directory record                */ + ZipConstants.DWORD
295        /* version made by                 */ + ZipConstants.SHORT
296        /* version needed to extract       */ + ZipConstants.SHORT
297        /* number of this disk             */ + ZipConstants.WORD
298        /* number of the disk with the     */
299        /* start of the central directory  */ + ZipConstants.WORD
300        /* total number of entries in the  */
301        /* central directory on this disk  */ + ZipConstants.DWORD
302        /* total number of entries in the  */
303        /* central directory               */ + ZipConstants.DWORD
304        /* size of the central directory   */ + ZipConstants.DWORD;
305    /**
306     * Offset of the field that holds the disk number of the first
307     * central directory entry inside the "Zip64 end of central
308     * directory record" relative to the start of the "Zip64 end of
309     * central directory record".
310     */
311    private static final int ZIP64_EOCD_CFD_DISK_OFFSET =
312            /* zip64 end of central dir        */
313            /* signature                       */ ZipConstants.WORD
314            /* size of zip64 end of central    */
315            /* directory record                */ + ZipConstants.DWORD
316            /* version made by                 */ + ZipConstants.SHORT
317            /* version needed to extract       */ + ZipConstants.SHORT
318            /* number of this disk             */ + ZipConstants.WORD;
319    /**
320     * Offset of the field that holds the location of the first
321     * central directory entry inside the "Zip64 end of central
322     * directory record" relative to the "number of the disk
323     * with the start of the central directory".
324     */
325    private static final int ZIP64_EOCD_CFD_LOCATOR_RELATIVE_OFFSET =
326            /* total number of entries in the  */
327            /* central directory on this disk  */ ZipConstants.DWORD
328            /* total number of entries in the  */
329            /* central directory               */ + ZipConstants.DWORD
330            /* size of the central directory   */ + ZipConstants.DWORD;
331    /**
332     * Number of bytes in local file header up to the &quot;length of
333     * file name&quot; entry.
334     */
335    private static final long LFH_OFFSET_FOR_FILENAME_LENGTH =
336        /* local file header signature     */ ZipConstants.WORD
337        /* version needed to extract       */ + ZipConstants.SHORT
338        /* general purpose bit flag        */ + ZipConstants.SHORT
339        /* compression method              */ + ZipConstants.SHORT
340        /* last mod file time              */ + ZipConstants.SHORT
341        /* last mod file date              */ + ZipConstants.SHORT
342        /* crc-32                          */ + ZipConstants.WORD
343        /* compressed size                 */ + ZipConstants.WORD
344        /* uncompressed size               */ + (long) ZipConstants.WORD;
345
346    /**
347     * Compares two ZipArchiveEntries based on their offset within the archive.
348     *
349     * <p>Won't return any meaningful results if one of the entries
350     * isn't part of the archive at all.</p>
351     *
352     * @since 1.1
353     */
354    private static final Comparator<ZipArchiveEntry> offsetComparator =
355        Comparator.comparingLong(ZipArchiveEntry::getDiskNumberStart)
356            .thenComparingLong(ZipArchiveEntry::getLocalHeaderOffset);
357
358    /**
359     * Closes a ZIP file quietly; throwing no IOException, does nothing
360     * on null input.
361     * @param zipFile file to close, can be null
362     */
363    public static void closeQuietly(final ZipFile zipFile) {
364        IOUtils.closeQuietly(zipFile);
365    }
366
367    /**
368     * List of entries in the order they appear inside the central
369     * directory.
370     */
371    private final List<ZipArchiveEntry> entries = new LinkedList<>();
372
373    /**
374     * Maps String to list of ZipArchiveEntrys, name -> actual entries.
375     */
376    private final Map<String, LinkedList<ZipArchiveEntry>> nameMap = new HashMap<>(HASH_SIZE);
377
378    /**
379     * The encoding to use for file names and the file comment.
380     *
381     * <p>For a list of possible values see <a
382     * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>.
383     * Defaults to UTF-8.</p>
384     */
385    private final String encoding;
386
387    /**
388     * The ZIP encoding to use for file names and the file comment.
389     */
390    private final ZipEncoding zipEncoding;
391
392    /**
393     * File name of actual source.
394     */
395    private final String archiveName;
396
397    /**
398     * The actual data source.
399     */
400    private final SeekableByteChannel archive;
401
402    /**
403     * Whether to look for and use Unicode extra fields.
404     */
405    private final boolean useUnicodeExtraFields;
406
407    /**
408     * Whether the file is closed.
409     */
410    private volatile boolean closed = true;
411
412    /**
413     * Whether the ZIP archive is a split ZIP archive
414     */
415    private final boolean isSplitZipArchive;
416
417    // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
418    private final byte[] dwordBuf = new byte[ZipConstants.DWORD];
419
420    private final byte[] wordBuf = new byte[ZipConstants.WORD];
421
422    private final byte[] cfhBuf = new byte[CFH_LEN];
423
424    private final byte[] shortBuf = new byte[ZipConstants.SHORT];
425
426    private final ByteBuffer dwordBbuf = ByteBuffer.wrap(dwordBuf);
427
428    private final ByteBuffer wordBbuf = ByteBuffer.wrap(wordBuf);
429
430    private final ByteBuffer cfhBbuf = ByteBuffer.wrap(cfhBuf);
431
432    private final ByteBuffer shortBbuf = ByteBuffer.wrap(shortBuf);
433
434    private long centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset;
435
436    private long centralDirectoryStartOffset;
437
438    private long firstLocalFileHeaderOffset;
439
440    /**
441     * Opens the given file for reading, assuming "UTF8" for file names.
442     *
443     * @param f the archive.
444     *
445     * @throws IOException if an error occurs while reading the file.
446     */
447    public ZipFile(final File f) throws IOException {
448        this(f, ZipEncodingHelper.UTF8);
449    }
450
451    /**
452     * Opens the given file for reading, assuming the specified
453     * encoding for file names and scanning for Unicode extra fields.
454     *
455     * @param f the archive.
456     * @param encoding the encoding to use for file names, use null
457     * for the platform's default encoding
458     *
459     * @throws IOException if an error occurs while reading the file.
460     */
461    public ZipFile(final File f, final String encoding) throws IOException {
462        this(f.toPath(), encoding, true);
463    }
464
465    /**
466     * Opens the given file for reading, assuming the specified
467     * encoding for file names.
468     *
469     * @param f the archive.
470     * @param encoding the encoding to use for file names, use null
471     * for the platform's default encoding
472     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
473     * Extra Fields (if present) to set the file names.
474     *
475     * @throws IOException if an error occurs while reading the file.
476     */
477    public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields)
478        throws IOException {
479        this(f.toPath(), encoding, useUnicodeExtraFields, false);
480    }
481
482    /**
483     * Opens the given file for reading, assuming the specified
484     * encoding for file names.
485     *
486     * <p>By default the central directory record and all local file headers of the archive will be read immediately
487     * which may take a considerable amount of time when the archive is big. The {@code ignoreLocalFileHeader} parameter
488     * can be set to {@code true} which restricts parsing to the central directory. Unfortunately the local file header
489     * may contain information not present inside of the central directory which will not be available when the argument
490     * is set to {@code true}. This includes the content of the Unicode extra field, so setting {@code
491     * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively.</p>
492     *
493     * @param f the archive.
494     * @param encoding the encoding to use for file names, use null
495     * for the platform's default encoding
496     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
497     * Extra Fields (if present) to set the file names.
498     * @param ignoreLocalFileHeader whether to ignore information
499     * stored inside the local file header (see the notes in this method's javadoc)
500     *
501     * @throws IOException if an error occurs while reading the file.
502     * @since 1.19
503     */
504    public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields,
505                   final boolean ignoreLocalFileHeader)
506        throws IOException {
507        this(Files.newByteChannel(f.toPath(), EnumSet.of(StandardOpenOption.READ)),
508             f.getAbsolutePath(), encoding, useUnicodeExtraFields, true, ignoreLocalFileHeader);
509    }
510
511    /**
512     * Opens the given path for reading, assuming "UTF8" for file names.
513     *
514     * @param path path to the archive.
515     * @throws IOException if an error occurs while reading the file.
516     * @since 1.22
517     */
518    public ZipFile(final Path path) throws IOException {
519        this(path, ZipEncodingHelper.UTF8);
520    }
521
522    /**
523     * Opens the given path for reading, assuming the specified
524     * encoding for file names and scanning for Unicode extra fields.
525     *
526     * @param path path to the archive.
527     * @param encoding the encoding to use for file names, use null
528     * for the platform's default encoding
529     * @throws IOException if an error occurs while reading the file.
530     * @since 1.22
531     */
532    public ZipFile(final Path path, final String encoding) throws IOException {
533        this(path, encoding, true);
534    }
535
536
537    /**
538     * Opens the given path for reading, assuming the specified
539     * encoding for file names.
540     *
541     * @param path path to the archive.
542     * @param encoding the encoding to use for file names, use null
543     * for the platform's default encoding
544     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
545     * Extra Fields (if present) to set the file names.
546     * @throws IOException if an error occurs while reading the file.
547     * @since 1.22
548     */
549    public ZipFile(final Path path, final String encoding, final boolean useUnicodeExtraFields)
550            throws IOException {
551        this(path, encoding, useUnicodeExtraFields, false);
552    }
553
554    /**
555     * Opens the given path for reading, assuming the specified
556     * encoding for file names.
557     * <p>By default the central directory record and all local file headers of the archive will be read immediately
558     * which may take a considerable amount of time when the archive is big. The {@code ignoreLocalFileHeader} parameter
559     * can be set to {@code true} which restricts parsing to the central directory. Unfortunately the local file header
560     * may contain information not present inside of the central directory which will not be available when the argument
561     * is set to {@code true}. This includes the content of the Unicode extra field, so setting {@code
562     * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively.</p>
563     *
564     * @param path path to the archive.
565     * @param encoding the encoding to use for file names, use null
566     * for the platform's default encoding
567     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
568     * Extra Fields (if present) to set the file names.
569     * @param ignoreLocalFileHeader whether to ignore information
570     * stored inside the local file header (see the notes in this method's javadoc)
571     * @throws IOException if an error occurs while reading the file.
572     * @since 1.22
573     */
574    public ZipFile(final Path path, final String encoding, final boolean useUnicodeExtraFields,
575                   final boolean ignoreLocalFileHeader)
576            throws IOException {
577        this(Files.newByteChannel(path, EnumSet.of(StandardOpenOption.READ)),
578                path.toAbsolutePath().toString(), encoding, useUnicodeExtraFields,
579                true, ignoreLocalFileHeader);
580    }
581
582    /**
583     * Opens the given channel for reading, assuming "UTF8" for file names.
584     *
585     * <p>{@link
586     * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
587     * allows you to read from an in-memory archive.</p>
588     *
589     * @param channel the archive.
590     *
591     * @throws IOException if an error occurs while reading the file.
592     * @since 1.13
593     */
594    public ZipFile(final SeekableByteChannel channel)
595            throws IOException {
596        this(channel, "unknown archive", ZipEncodingHelper.UTF8, true);
597    }
598
599    /**
600     * Opens the given channel for reading, assuming the specified
601     * encoding for file names.
602     *
603     * <p>{@link
604     * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
605     * allows you to read from an in-memory archive.</p>
606     *
607     * @param channel the archive.
608     * @param encoding the encoding to use for file names, use null
609     * for the platform's default encoding
610     *
611     * @throws IOException if an error occurs while reading the file.
612     * @since 1.13
613     */
614    public ZipFile(final SeekableByteChannel channel, final String encoding)
615        throws IOException {
616        this(channel, "unknown archive", encoding, true);
617    }
618
619    /**
620     * Opens the given channel for reading, assuming the specified
621     * encoding for file names.
622     *
623     * <p>{@link
624     * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
625     * allows you to read from an in-memory archive.</p>
626     *
627     * @param channel the archive.
628     * @param archiveName name of the archive, used for error messages only.
629     * @param encoding the encoding to use for file names, use null
630     * for the platform's default encoding
631     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
632     * Extra Fields (if present) to set the file names.
633     *
634     * @throws IOException if an error occurs while reading the file.
635     * @since 1.13
636     */
637    public ZipFile(final SeekableByteChannel channel, final String archiveName,
638                   final String encoding, final boolean useUnicodeExtraFields)
639        throws IOException {
640        this(channel, archiveName, encoding, useUnicodeExtraFields, false, false);
641    }
642
643    /**
644     * Opens the given channel for reading, assuming the specified
645     * encoding for file names.
646     *
647     * <p>{@link
648     * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
649     * allows you to read from an in-memory archive.</p>
650     *
651     * <p>By default the central directory record and all local file headers of the archive will be read immediately
652     * which may take a considerable amount of time when the archive is big. The {@code ignoreLocalFileHeader} parameter
653     * can be set to {@code true} which restricts parsing to the central directory. Unfortunately the local file header
654     * may contain information not present inside of the central directory which will not be available when the argument
655     * is set to {@code true}. This includes the content of the Unicode extra field, so setting {@code
656     * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively.</p>
657     *
658     * @param channel the archive.
659     * @param archiveName name of the archive, used for error messages only.
660     * @param encoding the encoding to use for file names, use null
661     * for the platform's default encoding
662     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
663     * Extra Fields (if present) to set the file names.
664     * @param ignoreLocalFileHeader whether to ignore information
665     * stored inside the local file header (see the notes in this method's javadoc)
666     *
667     * @throws IOException if an error occurs while reading the file.
668     * @since 1.19
669     */
670    public ZipFile(final SeekableByteChannel channel, final String archiveName,
671                   final String encoding, final boolean useUnicodeExtraFields,
672                   final boolean ignoreLocalFileHeader)
673        throws IOException {
674        this(channel, archiveName, encoding, useUnicodeExtraFields, false, ignoreLocalFileHeader);
675    }
676
677    private ZipFile(final SeekableByteChannel channel, final String archiveName,
678                    final String encoding, final boolean useUnicodeExtraFields,
679                    final boolean closeOnError, final boolean ignoreLocalFileHeader)
680        throws IOException {
681        isSplitZipArchive = (channel instanceof ZipSplitReadOnlySeekableByteChannel);
682
683        this.archiveName = archiveName;
684        this.encoding = encoding;
685        this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
686        this.useUnicodeExtraFields = useUnicodeExtraFields;
687        archive = channel;
688        boolean success = false;
689        try {
690            final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag =
691                populateFromCentralDirectory();
692            if (!ignoreLocalFileHeader) {
693                resolveLocalFileHeaderData(entriesWithoutUTF8Flag);
694            }
695            fillNameMap();
696            success = true;
697        } catch (final IOException e) {
698            throw new IOException("Error on ZipFile " + archiveName, e);
699        } finally {
700            closed = !success;
701            if (!success && closeOnError) {
702                IOUtils.closeQuietly(archive);
703            }
704        }
705    }
706
707    /**
708     * Opens the given file for reading, assuming "UTF8".
709     *
710     * @param name name of the archive.
711     *
712     * @throws IOException if an error occurs while reading the file.
713     */
714    public ZipFile(final String name) throws IOException {
715        this(new File(name).toPath(), ZipEncodingHelper.UTF8);
716    }
717
718    /**
719     * Opens the given file for reading, assuming the specified
720     * encoding for file names, scanning unicode extra fields.
721     *
722     * @param name name of the archive.
723     * @param encoding the encoding to use for file names, use null
724     * for the platform's default encoding
725     *
726     * @throws IOException if an error occurs while reading the file.
727     */
728    public ZipFile(final String name, final String encoding) throws IOException {
729        this(new File(name).toPath(), encoding, true);
730    }
731
732    /**
733     * Whether this class is able to read the given entry.
734     *
735     * <p>May return false if it is set up to use encryption or a
736     * compression method that hasn't been implemented yet.</p>
737     * @since 1.1
738     * @param ze the entry
739     * @return whether this class is able to read the given entry.
740     */
741    public boolean canReadEntryData(final ZipArchiveEntry ze) {
742        return ZipUtil.canHandleEntryData(ze);
743    }
744
745    /**
746     * Closes the archive.
747     * @throws IOException if an error occurs closing the archive.
748     */
749    @Override
750    public void close() throws IOException {
751        // this flag is only written here and read in finalize() which
752        // can never be run in parallel.
753        // no synchronization needed.
754        closed = true;
755
756        archive.close();
757    }
758
759    /**
760     * Transfer selected entries from this ZIP file to a given #ZipArchiveOutputStream.
761     * Compression and all other attributes will be as in this file.
762     * <p>This method transfers entries based on the central directory of the ZIP file.</p>
763     *
764     * @param target The zipArchiveOutputStream to write the entries to
765     * @param predicate A predicate that selects which entries to write
766     * @throws IOException on error
767     */
768    public void copyRawEntries(final ZipArchiveOutputStream target, final ZipArchiveEntryPredicate predicate)
769            throws IOException {
770        final Enumeration<ZipArchiveEntry> src = getEntriesInPhysicalOrder();
771        while (src.hasMoreElements()) {
772            final ZipArchiveEntry entry = src.nextElement();
773            if (predicate.test( entry)) {
774                target.addRawArchiveEntry(entry, getRawInputStream(entry));
775            }
776        }
777    }
778
779    /**
780     * Creates new BoundedInputStream, according to implementation of
781     * underlying archive channel.
782     */
783    private BoundedArchiveInputStream createBoundedInputStream(final long start, final long remaining) {
784        if (start < 0 || remaining < 0 || start + remaining < start) {
785            throw new IllegalArgumentException("Corrupted archive, stream boundaries"
786                + " are out of range");
787        }
788        return archive instanceof FileChannel ?
789            new BoundedFileChannelInputStream(start, remaining) :
790            new BoundedSeekableByteChannelInputStream(start, remaining, archive);
791    }
792
793    private void fillNameMap() {
794        entries.forEach(ze -> {
795            // entries are filled in populateFromCentralDirectory and
796            // never modified
797            final String name = ze.getName();
798            final LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.computeIfAbsent(name, k -> new LinkedList<>());
799            entriesOfThatName.addLast(ze);
800        });
801    }
802
803    /**
804     * Ensures that the close method of this ZIP file is called when
805     * there are no more references to it.
806     * @see #close()
807     */
808    @Override
809    protected void finalize() throws Throwable {
810        try {
811            if (!closed) {
812                close();
813            }
814        } finally {
815            super.finalize();
816        }
817    }
818
819    /**
820     * Gets an InputStream for reading the content before the first local file header.
821     *
822     * @return null if there is no content before the first local file header.
823     * Otherwise, returns a stream to read the content before the first local file header.
824     * @since 1.23
825     */
826    public InputStream getContentBeforeFirstLocalFileHeader() {
827        return firstLocalFileHeaderOffset == 0
828                ? null : createBoundedInputStream(0, firstLocalFileHeaderOffset);
829    }
830
831    private long getDataOffset(final ZipArchiveEntry ze) throws IOException {
832        final long s = ze.getDataOffset();
833        if (s == EntryStreamOffsets.OFFSET_UNKNOWN) {
834            setDataOffset(ze);
835            return ze.getDataOffset();
836        }
837        return s;
838    }
839
840    /**
841     * Gets the encoding to use for file names and the file comment.
842     *
843     * @return null if using the platform's default character encoding.
844     */
845    public String getEncoding() {
846        return encoding;
847    }
848
849    /**
850     * Gets all entries.
851     *
852     * <p>Entries will be returned in the same order they appear
853     * within the archive's central directory.</p>
854     *
855     * @return all entries as {@link ZipArchiveEntry} instances
856     */
857    public Enumeration<ZipArchiveEntry> getEntries() {
858        return Collections.enumeration(entries);
859    }
860
861    /**
862     * Gets all named entries in the same order they appear within
863     * the archive's central directory.
864     *
865     * @param name name of the entry.
866     * @return the Iterable&lt;ZipArchiveEntry&gt; corresponding to the
867     * given name
868     * @since 1.6
869     */
870    public Iterable<ZipArchiveEntry> getEntries(final String name) {
871        return nameMap.getOrDefault(name, ZipArchiveEntry.EMPTY_LINKED_LIST);
872    }
873
874    /**
875     * Gets all entries in physical order.
876     *
877     * <p>Entries will be returned in the same order their contents
878     * appear within the archive.</p>
879     *
880     * @return all entries as {@link ZipArchiveEntry} instances
881     *
882     * @since 1.1
883     */
884    public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() {
885        final ZipArchiveEntry[] allEntries = entries.toArray(ZipArchiveEntry.EMPTY_ARRAY);
886        Arrays.sort(allEntries, offsetComparator);
887        return Collections.enumeration(Arrays.asList(allEntries));
888    }
889
890    /**
891     * Gets all named entries in the same order their contents
892     * appear within the archive.
893     *
894     * @param name name of the entry.
895     * @return the Iterable&lt;ZipArchiveEntry&gt; corresponding to the
896     * given name
897     * @since 1.6
898     */
899    public Iterable<ZipArchiveEntry> getEntriesInPhysicalOrder(final String name) {
900        ZipArchiveEntry[] entriesOfThatName = ZipArchiveEntry.EMPTY_ARRAY;
901        final LinkedList<ZipArchiveEntry> linkedList = nameMap.get(name);
902        if (linkedList != null) {
903            entriesOfThatName = linkedList.toArray(entriesOfThatName);
904            Arrays.sort(entriesOfThatName, offsetComparator);
905        }
906        return Arrays.asList(entriesOfThatName);
907    }
908
909    /**
910     * Gets a named entry or {@code null} if no entry by
911     * that name exists.
912     *
913     * <p>If multiple entries with the same name exist the first entry
914     * in the archive's central directory by that name is
915     * returned.</p>
916     *
917     * @param name name of the entry.
918     * @return the ZipArchiveEntry corresponding to the given name - or
919     * {@code null} if not present.
920     */
921    public ZipArchiveEntry getEntry(final String name) {
922        final LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name);
923        return entriesOfThatName != null ? entriesOfThatName.getFirst() : null;
924    }
925
926    /**
927     * Gets the offset of the first local file header in the file.
928     *
929     * @return the length of the content before the first local file header
930     * @since 1.23
931     */
932    public long getFirstLocalFileHeaderOffset() {
933        return firstLocalFileHeaderOffset;
934    }
935
936    /**
937     * Gets an InputStream for reading the contents of the given entry.
938     *
939     * @param zipEntry the entry to get the stream for.
940     * @return a stream to read the entry from. The returned stream
941     * implements {@link InputStreamStatistics}.
942     * @throws IOException if unable to create an input stream from the zipEntry.
943     */
944    public InputStream getInputStream(final ZipArchiveEntry zipEntry)
945        throws IOException {
946        if (!(zipEntry instanceof Entry)) {
947            return null;
948        }
949        // cast validity is checked just above
950        ZipUtil.checkRequestedFeatures(zipEntry);
951
952        // doesn't get closed if the method is not supported - which
953        // should never happen because of the checkRequestedFeatures
954        // call above
955        final InputStream is = new BufferedInputStream(getRawInputStream(zipEntry)); //NOSONAR
956        switch (ZipMethod.getMethodByCode(zipEntry.getMethod())) {
957            case STORED:
958                return new StoredStatisticsStream(is);
959            case UNSHRINKING:
960                return new UnshrinkingInputStream(is);
961            case IMPLODING:
962                try {
963                    return new ExplodingInputStream(zipEntry.getGeneralPurposeBit().getSlidingDictionarySize(),
964                            zipEntry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), is);
965                } catch (final IllegalArgumentException ex) {
966                    throw new IOException("bad IMPLODE data", ex);
967                }
968            case DEFLATED:
969                final Inflater inflater = new Inflater(true);
970                // Inflater with nowrap=true has this odd contract for a zero padding
971                // byte following the data stream; this used to be zlib's requirement
972                // and has been fixed a long time ago, but the contract persists so
973                // we comply.
974                // https://docs.oracle.com/javase/7/docs/api/java/util/zip/Inflater.html#Inflater(boolean)
975                return new InflaterInputStreamWithStatistics(new SequenceInputStream(is, new ByteArrayInputStream(ONE_ZERO_BYTE)),
976                    inflater) {
977                    @Override
978                    public void close() throws IOException {
979                        try {
980                            super.close();
981                        } finally {
982                            inflater.end();
983                        }
984                    }
985                };
986            case BZIP2:
987                return new BZip2CompressorInputStream(is);
988            case ENHANCED_DEFLATED:
989                return new Deflate64CompressorInputStream(is);
990            case AES_ENCRYPTED:
991            case EXPANDING_LEVEL_1:
992            case EXPANDING_LEVEL_2:
993            case EXPANDING_LEVEL_3:
994            case EXPANDING_LEVEL_4:
995            case JPEG:
996            case LZMA:
997            case PKWARE_IMPLODING:
998            case PPMD:
999            case TOKENIZATION:
1000            case UNKNOWN:
1001            case WAVPACK:
1002            case XZ:
1003            default:
1004                throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(zipEntry.getMethod()), zipEntry);
1005        }
1006    }
1007
1008    /**
1009     * Gets the raw stream of the archive entry (compressed form).
1010     *
1011     * <p>This method does not relate to how/if we understand the payload in the
1012     * stream, since we really only intend to move it on to somewhere else.</p>
1013     *
1014     * <p>Since version 1.22, this method will make an attempt to read the entry's data
1015     * stream offset, even if the {@code ignoreLocalFileHeader} parameter was {@code true}
1016     * in the constructor. An IOException can also be thrown from the body of the method
1017     * if this lookup fails for some reason.</p>
1018     *
1019     * @param ze The entry to get the stream for
1020     * @return The raw input stream containing (possibly) compressed data.
1021     * @since 1.11
1022     * @throws IOException if there is a problem reading data offset (added in version 1.22).
1023     */
1024    public InputStream getRawInputStream(final ZipArchiveEntry ze) throws IOException {
1025        if (!(ze instanceof Entry)) {
1026            return null;
1027        }
1028
1029        final long start = getDataOffset(ze);
1030        if (start == EntryStreamOffsets.OFFSET_UNKNOWN) {
1031            return null;
1032        }
1033        return createBoundedInputStream(start, ze.getCompressedSize());
1034    }
1035
1036    /**
1037     * Gets the entry's content as a String if isUnixSymlink()
1038     * returns true for it, otherwise returns null.
1039     * <p>This method assumes the symbolic link's file name uses the
1040     * same encoding that as been specified for this ZipFile.</p>
1041     *
1042     * @param entry ZipArchiveEntry object that represents the symbolic link
1043     * @return entry's content as a String
1044     * @throws IOException problem with content's input stream
1045     * @since 1.5
1046     */
1047    public String getUnixSymlink(final ZipArchiveEntry entry) throws IOException {
1048        if (entry != null && entry.isUnixSymlink()) {
1049            try (InputStream in = getInputStream(entry)) {
1050                return zipEncoding.decode(IOUtils.toByteArray(in));
1051            }
1052        }
1053        return null;
1054    }
1055
1056    /**
1057     * Reads the central directory of the given archive and populates
1058     * the internal tables with ZipArchiveEntry instances.
1059     *
1060     * <p>The ZipArchiveEntrys will know all data that can be obtained from
1061     * the central directory alone, but not the data that requires the
1062     * local file header or additional data to be read.</p>
1063     *
1064     * @return a map of zip entries that didn't have the language
1065     * encoding flag set when read.
1066     */
1067    private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory()
1068        throws IOException {
1069        final HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag =
1070            new HashMap<>();
1071
1072        positionAtCentralDirectory();
1073        centralDirectoryStartOffset = archive.position();
1074
1075        wordBbuf.rewind();
1076        IOUtils.readFully(archive, wordBbuf);
1077        long sig = ZipLong.getValue(wordBuf);
1078
1079        if (sig != CFH_SIG && startsWithLocalFileHeader()) {
1080            throw new IOException("Central directory is empty, can't expand"
1081                                  + " corrupt archive.");
1082        }
1083
1084        while (sig == CFH_SIG) {
1085            readCentralDirectoryEntry(noUTF8Flag);
1086            wordBbuf.rewind();
1087            IOUtils.readFully(archive, wordBbuf);
1088            sig = ZipLong.getValue(wordBuf);
1089        }
1090        return noUTF8Flag;
1091    }
1092
1093    /**
1094     * Searches for either the &quot;Zip64 end of central directory
1095     * locator&quot; or the &quot;End of central dir record&quot;, parses
1096     * it and positions the stream at the first central directory
1097     * record.
1098     */
1099    private void positionAtCentralDirectory()
1100        throws IOException {
1101        positionAtEndOfCentralDirectoryRecord();
1102        boolean found = false;
1103        final boolean searchedForZip64EOCD =
1104            archive.position() > ZIP64_EOCDL_LENGTH;
1105        if (searchedForZip64EOCD) {
1106            archive.position(archive.position() - ZIP64_EOCDL_LENGTH);
1107            wordBbuf.rewind();
1108            IOUtils.readFully(archive, wordBbuf);
1109            found = Arrays.equals(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG,
1110                                  wordBuf);
1111        }
1112        if (!found) {
1113            // not a ZIP64 archive
1114            if (searchedForZip64EOCD) {
1115                skipBytes(ZIP64_EOCDL_LENGTH - ZipConstants.WORD);
1116            }
1117            positionAtCentralDirectory32();
1118        } else {
1119            positionAtCentralDirectory64();
1120        }
1121    }
1122
1123    /**
1124     * Parses the &quot;End of central dir record&quot; and positions
1125     * the stream at the first central directory record.
1126     *
1127     * Expects stream to be positioned at the beginning of the
1128     * &quot;End of central dir record&quot;.
1129     */
1130    private void positionAtCentralDirectory32()
1131        throws IOException {
1132        final long endOfCentralDirectoryRecordOffset = archive.position();
1133        if (isSplitZipArchive) {
1134            skipBytes(CFD_DISK_OFFSET);
1135            shortBbuf.rewind();
1136            IOUtils.readFully(archive, shortBbuf);
1137            centralDirectoryStartDiskNumber = ZipShort.getValue(shortBuf);
1138
1139            skipBytes(CFD_LOCATOR_RELATIVE_OFFSET);
1140
1141            wordBbuf.rewind();
1142            IOUtils.readFully(archive, wordBbuf);
1143            centralDirectoryStartRelativeOffset = ZipLong.getValue(wordBuf);
1144            ((ZipSplitReadOnlySeekableByteChannel) archive)
1145                .position(centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset);
1146        } else {
1147            skipBytes(CFD_LENGTH_OFFSET);
1148            wordBbuf.rewind();
1149            IOUtils.readFully(archive, wordBbuf);
1150            final long centralDirectoryLength = ZipLong.getValue(wordBuf);
1151
1152            wordBbuf.rewind();
1153            IOUtils.readFully(archive, wordBbuf);
1154            centralDirectoryStartDiskNumber = 0;
1155            centralDirectoryStartRelativeOffset = ZipLong.getValue(wordBuf);
1156
1157            firstLocalFileHeaderOffset = Long.max(
1158                    endOfCentralDirectoryRecordOffset - centralDirectoryLength - centralDirectoryStartRelativeOffset,
1159                    0L);
1160            archive.position(centralDirectoryStartRelativeOffset + firstLocalFileHeaderOffset);
1161        }
1162    }
1163
1164    /**
1165     * Parses the &quot;Zip64 end of central directory locator&quot;,
1166     * finds the &quot;Zip64 end of central directory record&quot; using the
1167     * parsed information, parses that and positions the stream at the
1168     * first central directory record.
1169     *
1170     * Expects stream to be positioned right behind the &quot;Zip64
1171     * end of central directory locator&quot;'s signature.
1172     */
1173    private void positionAtCentralDirectory64()
1174        throws IOException {
1175        if (isSplitZipArchive) {
1176            wordBbuf.rewind();
1177            IOUtils.readFully(archive, wordBbuf);
1178            final long diskNumberOfEOCD = ZipLong.getValue(wordBuf);
1179
1180            dwordBbuf.rewind();
1181            IOUtils.readFully(archive, dwordBbuf);
1182            final long relativeOffsetOfEOCD = ZipEightByteInteger.getLongValue(dwordBuf);
1183            ((ZipSplitReadOnlySeekableByteChannel) archive)
1184                .position(diskNumberOfEOCD, relativeOffsetOfEOCD);
1185        } else {
1186            skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET
1187                    - ZipConstants.WORD /* signature has already been read */);
1188            dwordBbuf.rewind();
1189            IOUtils.readFully(archive, dwordBbuf);
1190            archive.position(ZipEightByteInteger.getLongValue(dwordBuf));
1191        }
1192
1193        wordBbuf.rewind();
1194        IOUtils.readFully(archive, wordBbuf);
1195        if (!Arrays.equals(wordBuf, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) {
1196            throw new ZipException("Archive's ZIP64 end of central "
1197                                   + "directory locator is corrupt.");
1198        }
1199
1200        if (isSplitZipArchive) {
1201            skipBytes(ZIP64_EOCD_CFD_DISK_OFFSET
1202                    - ZipConstants.WORD /* signature has already been read */);
1203            wordBbuf.rewind();
1204            IOUtils.readFully(archive, wordBbuf);
1205            centralDirectoryStartDiskNumber = ZipLong.getValue(wordBuf);
1206
1207            skipBytes(ZIP64_EOCD_CFD_LOCATOR_RELATIVE_OFFSET);
1208
1209            dwordBbuf.rewind();
1210            IOUtils.readFully(archive, dwordBbuf);
1211            centralDirectoryStartRelativeOffset = ZipEightByteInteger.getLongValue(dwordBuf);
1212            ((ZipSplitReadOnlySeekableByteChannel) archive)
1213                .position(centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset);
1214        } else {
1215            skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET
1216                    - ZipConstants.WORD /* signature has already been read */);
1217            dwordBbuf.rewind();
1218            IOUtils.readFully(archive, dwordBbuf);
1219            centralDirectoryStartDiskNumber = 0;
1220            centralDirectoryStartRelativeOffset = ZipEightByteInteger.getLongValue(dwordBuf);
1221            archive.position(centralDirectoryStartRelativeOffset);
1222        }
1223    }
1224
1225    /**
1226     * Searches for the and positions the stream at the start of the
1227     * &quot;End of central dir record&quot;.
1228     */
1229    private void positionAtEndOfCentralDirectoryRecord()
1230        throws IOException {
1231        final boolean found = tryToLocateSignature(MIN_EOCD_SIZE, MAX_EOCD_SIZE,
1232                                             ZipArchiveOutputStream.EOCD_SIG);
1233        if (!found) {
1234            throw new ZipException("Archive is not a ZIP archive");
1235        }
1236    }
1237
1238    /**
1239     * Reads an individual entry of the central directory, creates an
1240     * ZipArchiveEntry from it and adds it to the global maps.
1241     *
1242     * @param noUTF8Flag map used to collect entries that don't have
1243     * their UTF-8 flag set and whose name will be set by data read
1244     * from the local file header later.  The current entry may be
1245     * added to this map.
1246     */
1247    private void
1248        readCentralDirectoryEntry(final Map<ZipArchiveEntry, NameAndComment> noUTF8Flag)
1249        throws IOException {
1250        cfhBbuf.rewind();
1251        IOUtils.readFully(archive, cfhBbuf);
1252        int off = 0;
1253        final Entry ze = new Entry();
1254
1255        final int versionMadeBy = ZipShort.getValue(cfhBuf, off);
1256        off += ZipConstants.SHORT;
1257        ze.setVersionMadeBy(versionMadeBy);
1258        ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK);
1259
1260        ze.setVersionRequired(ZipShort.getValue(cfhBuf, off));
1261        off += ZipConstants.SHORT; // version required
1262
1263        final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(cfhBuf, off);
1264        final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
1265        final ZipEncoding entryEncoding =
1266            hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
1267        if (hasUTF8Flag) {
1268            ze.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG);
1269        }
1270        ze.setGeneralPurposeBit(gpFlag);
1271        ze.setRawFlag(ZipShort.getValue(cfhBuf, off));
1272
1273        off += ZipConstants.SHORT;
1274
1275        //noinspection MagicConstant
1276        ze.setMethod(ZipShort.getValue(cfhBuf, off));
1277        off += ZipConstants.SHORT;
1278
1279        final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfhBuf, off));
1280        ze.setTime(time);
1281        off += ZipConstants.WORD;
1282
1283        ze.setCrc(ZipLong.getValue(cfhBuf, off));
1284        off += ZipConstants.WORD;
1285
1286        long size = ZipLong.getValue(cfhBuf, off);
1287        if (size < 0) {
1288            throw new IOException("broken archive, entry with negative compressed size");
1289        }
1290        ze.setCompressedSize(size);
1291        off += ZipConstants.WORD;
1292
1293        size = ZipLong.getValue(cfhBuf, off);
1294        if (size < 0) {
1295            throw new IOException("broken archive, entry with negative size");
1296        }
1297        ze.setSize(size);
1298        off += ZipConstants.WORD;
1299
1300        final int fileNameLen = ZipShort.getValue(cfhBuf, off);
1301        off += ZipConstants.SHORT;
1302        if (fileNameLen < 0) {
1303            throw new IOException("broken archive, entry with negative fileNameLen");
1304        }
1305
1306        final int extraLen = ZipShort.getValue(cfhBuf, off);
1307        off += ZipConstants.SHORT;
1308        if (extraLen < 0) {
1309            throw new IOException("broken archive, entry with negative extraLen");
1310        }
1311
1312        final int commentLen = ZipShort.getValue(cfhBuf, off);
1313        off += ZipConstants.SHORT;
1314        if (commentLen < 0) {
1315            throw new IOException("broken archive, entry with negative commentLen");
1316        }
1317
1318        ze.setDiskNumberStart(ZipShort.getValue(cfhBuf, off));
1319        off += ZipConstants.SHORT;
1320
1321        ze.setInternalAttributes(ZipShort.getValue(cfhBuf, off));
1322        off += ZipConstants.SHORT;
1323
1324        ze.setExternalAttributes(ZipLong.getValue(cfhBuf, off));
1325        off += ZipConstants.WORD;
1326
1327        final byte[] fileName = IOUtils.readRange(archive, fileNameLen);
1328        if (fileName.length < fileNameLen) {
1329            throw new EOFException();
1330        }
1331        ze.setName(entryEncoding.decode(fileName), fileName);
1332
1333        // LFH offset,
1334        ze.setLocalHeaderOffset(ZipLong.getValue(cfhBuf, off) + firstLocalFileHeaderOffset);
1335        // data offset will be filled later
1336        entries.add(ze);
1337
1338        final byte[] cdExtraData = IOUtils.readRange(archive, extraLen);
1339        if (cdExtraData.length < extraLen) {
1340            throw new EOFException();
1341        }
1342        try {
1343            ze.setCentralDirectoryExtra(cdExtraData);
1344        } catch (final RuntimeException ex) {
1345            final ZipException z = new ZipException("Invalid extra data in entry " + ze.getName());
1346            z.initCause(ex);
1347            throw z;
1348        }
1349
1350        setSizesAndOffsetFromZip64Extra(ze);
1351        sanityCheckLFHOffset(ze);
1352
1353        final byte[] comment = IOUtils.readRange(archive, commentLen);
1354        if (comment.length < commentLen) {
1355            throw new EOFException();
1356        }
1357        ze.setComment(entryEncoding.decode(comment));
1358
1359        if (!hasUTF8Flag && useUnicodeExtraFields) {
1360            noUTF8Flag.put(ze, new NameAndComment(fileName, comment));
1361        }
1362
1363        ze.setStreamContiguous(true);
1364    }
1365
1366    /**
1367     * Walks through all recorded entries and adds the data available
1368     * from the local file header.
1369     *
1370     * <p>Also records the offsets for the data to read from the
1371     * entries.</p>
1372     */
1373    private void resolveLocalFileHeaderData(final Map<ZipArchiveEntry, NameAndComment>
1374                                            entriesWithoutUTF8Flag)
1375        throws IOException {
1376        for (final ZipArchiveEntry zipArchiveEntry : entries) {
1377            // entries are filled in populateFromCentralDirectory and
1378            // never modified
1379            final Entry ze = (Entry) zipArchiveEntry;
1380            final int[] lens = setDataOffset(ze);
1381            final int fileNameLen = lens[0];
1382            final int extraFieldLen = lens[1];
1383            skipBytes(fileNameLen);
1384            final byte[] localExtraData = IOUtils.readRange(archive, extraFieldLen);
1385            if (localExtraData.length < extraFieldLen) {
1386                throw new EOFException();
1387            }
1388            try {
1389                ze.setExtra(localExtraData);
1390            } catch (final RuntimeException ex) {
1391                final ZipException z = new ZipException("Invalid extra data in entry " + ze.getName());
1392                z.initCause(ex);
1393                throw z;
1394            }
1395
1396            if (entriesWithoutUTF8Flag.containsKey(ze)) {
1397                final NameAndComment nc = entriesWithoutUTF8Flag.get(ze);
1398                ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name,
1399                                                         nc.comment);
1400            }
1401        }
1402    }
1403
1404    private void sanityCheckLFHOffset(final ZipArchiveEntry ze) throws IOException {
1405        if (ze.getDiskNumberStart() < 0) {
1406            throw new IOException("broken archive, entry with negative disk number");
1407        }
1408        if (ze.getLocalHeaderOffset() < 0) {
1409            throw new IOException("broken archive, entry with negative local file header offset");
1410        }
1411        if (isSplitZipArchive) {
1412            if (ze.getDiskNumberStart() > centralDirectoryStartDiskNumber) {
1413                throw new IOException("local file header for " + ze.getName() + " starts on a later disk than central directory");
1414            }
1415            if (ze.getDiskNumberStart() == centralDirectoryStartDiskNumber
1416                && ze.getLocalHeaderOffset() > centralDirectoryStartRelativeOffset) {
1417                throw new IOException("local file header for " + ze.getName() + " starts after central directory");
1418            }
1419        } else if (ze.getLocalHeaderOffset() > centralDirectoryStartOffset) {
1420            throw new IOException("local file header for " + ze.getName() + " starts after central directory");
1421        }
1422    }
1423
1424    private int[] setDataOffset(final ZipArchiveEntry ze) throws IOException {
1425        long offset = ze.getLocalHeaderOffset();
1426        if (isSplitZipArchive) {
1427            ((ZipSplitReadOnlySeekableByteChannel) archive)
1428                .position(ze.getDiskNumberStart(), offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
1429            // the offset should be updated to the global offset
1430            offset = archive.position() - LFH_OFFSET_FOR_FILENAME_LENGTH;
1431        } else {
1432            archive.position(offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
1433        }
1434        wordBbuf.rewind();
1435        IOUtils.readFully(archive, wordBbuf);
1436        wordBbuf.flip();
1437        wordBbuf.get(shortBuf);
1438        final int fileNameLen = ZipShort.getValue(shortBuf);
1439        wordBbuf.get(shortBuf);
1440        final int extraFieldLen = ZipShort.getValue(shortBuf);
1441        ze.setDataOffset(offset + LFH_OFFSET_FOR_FILENAME_LENGTH
1442                         + ZipConstants.SHORT + ZipConstants.SHORT + fileNameLen + extraFieldLen);
1443        if (ze.getDataOffset() + ze.getCompressedSize() > centralDirectoryStartOffset) {
1444            throw new IOException("data for " + ze.getName() + " overlaps with central directory.");
1445        }
1446        return new int[] { fileNameLen, extraFieldLen };
1447    }
1448
1449    /**
1450     * If the entry holds a Zip64 extended information extra field,
1451     * read sizes from there if the entry's sizes are set to
1452     * 0xFFFFFFFFF, do the same for the offset of the local file
1453     * header.
1454     *
1455     * <p>Ensures the Zip64 extra either knows both compressed and
1456     * uncompressed size or neither of both as the internal logic in
1457     * ExtraFieldUtils forces the field to create local header data
1458     * even if they are never used - and here a field with only one
1459     * size would be invalid.</p>
1460     */
1461    private void setSizesAndOffsetFromZip64Extra(final ZipArchiveEntry ze)
1462        throws IOException {
1463        final ZipExtraField extra =
1464            ze.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
1465        if (extra != null && !(extra instanceof Zip64ExtendedInformationExtraField)) {
1466            throw new ZipException("archive contains unparseable zip64 extra field");
1467        }
1468        final Zip64ExtendedInformationExtraField z64 =
1469            (Zip64ExtendedInformationExtraField) extra;
1470        if (z64 != null) {
1471            final boolean hasUncompressedSize = ze.getSize() == ZipConstants.ZIP64_MAGIC;
1472            final boolean hasCompressedSize = ze.getCompressedSize() == ZipConstants.ZIP64_MAGIC;
1473            final boolean hasRelativeHeaderOffset =
1474                ze.getLocalHeaderOffset() == ZipConstants.ZIP64_MAGIC;
1475            final boolean hasDiskStart = ze.getDiskNumberStart() == ZipConstants.ZIP64_MAGIC_SHORT;
1476            z64.reparseCentralDirectoryData(hasUncompressedSize,
1477                                            hasCompressedSize,
1478                                            hasRelativeHeaderOffset,
1479                                            hasDiskStart);
1480
1481            if (hasUncompressedSize) {
1482                final long size = z64.getSize().getLongValue();
1483                if (size < 0) {
1484                    throw new IOException("broken archive, entry with negative size");
1485                }
1486                ze.setSize(size);
1487            } else if (hasCompressedSize) {
1488                z64.setSize(new ZipEightByteInteger(ze.getSize()));
1489            }
1490
1491            if (hasCompressedSize) {
1492                final long size = z64.getCompressedSize().getLongValue();
1493                if (size < 0) {
1494                    throw new IOException("broken archive, entry with negative compressed size");
1495                }
1496                ze.setCompressedSize(size);
1497            } else if (hasUncompressedSize) {
1498                z64.setCompressedSize(new ZipEightByteInteger(ze.getCompressedSize()));
1499            }
1500
1501            if (hasRelativeHeaderOffset) {
1502                ze.setLocalHeaderOffset(z64.getRelativeHeaderOffset().getLongValue());
1503            }
1504
1505            if (hasDiskStart) {
1506                ze.setDiskNumberStart(z64.getDiskStartNumber().getValue());
1507            }
1508        }
1509    }
1510
1511    /**
1512     * Skips the given number of bytes or throws an EOFException if
1513     * skipping failed.
1514     */
1515    private void skipBytes(final int count) throws IOException {
1516        final long currentPosition = archive.position();
1517        final long newPosition = currentPosition + count;
1518        if (newPosition > archive.size()) {
1519            throw new EOFException();
1520        }
1521        archive.position(newPosition);
1522    }
1523
1524    /**
1525     * Checks whether the archive starts with an LFH. If it doesn't,
1526     * it may be an empty archive.
1527     */
1528    private boolean startsWithLocalFileHeader() throws IOException {
1529        archive.position(firstLocalFileHeaderOffset);
1530        wordBbuf.rewind();
1531        IOUtils.readFully(archive, wordBbuf);
1532        return Arrays.equals(wordBuf, ZipArchiveOutputStream.LFH_SIG);
1533    }
1534
1535    /**
1536     * Searches the archive backwards from minDistance to maxDistance
1537     * for the given signature, positions the RandomaccessFile right
1538     * at the signature if it has been found.
1539     */
1540    private boolean tryToLocateSignature(final long minDistanceFromEnd,
1541                                         final long maxDistanceFromEnd,
1542                                         final byte[] sig) throws IOException {
1543        boolean found = false;
1544        long off = archive.size() - minDistanceFromEnd;
1545        final long stopSearching =
1546            Math.max(0L, archive.size() - maxDistanceFromEnd);
1547        if (off >= 0) {
1548            for (; off >= stopSearching; off--) {
1549                archive.position(off);
1550                try {
1551                    wordBbuf.rewind();
1552                    IOUtils.readFully(archive, wordBbuf);
1553                    wordBbuf.flip();
1554                } catch (final EOFException ex) { // NOSONAR
1555                    break;
1556                }
1557                int curr = wordBbuf.get();
1558                if (curr == sig[POS_0]) {
1559                    curr = wordBbuf.get();
1560                    if (curr == sig[POS_1]) {
1561                        curr = wordBbuf.get();
1562                        if (curr == sig[POS_2]) {
1563                            curr = wordBbuf.get();
1564                            if (curr == sig[POS_3]) {
1565                                found = true;
1566                                break;
1567                            }
1568                        }
1569                    }
1570                }
1571            }
1572        }
1573        if (found) {
1574            archive.position(off);
1575        }
1576        return found;
1577    }
1578}