001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one or more
003 *  contributor license agreements.  See the NOTICE file distributed with
004 *  this work for additional information regarding copyright ownership.
005 *  The ASF licenses this file to You under the Apache License, Version 2.0
006 *  (the "License"); you may not use this file except in compliance with
007 *  the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 *  Unless required by applicable law or agreed to in writing, software
012 *  distributed under the License is distributed on an "AS IS" BASIS,
013 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 *  See the License for the specific language governing permissions and
015 *  limitations under the License.
016 *
017 */
018package org.apache.commons.compress.archivers.arj;
019
020import java.io.ByteArrayInputStream;
021import java.io.ByteArrayOutputStream;
022import java.io.DataInputStream;
023import java.io.EOFException;
024import java.io.IOException;
025import java.io.InputStream;
026import java.util.ArrayList;
027import java.util.zip.CRC32;
028
029import org.apache.commons.compress.archivers.ArchiveEntry;
030import org.apache.commons.compress.archivers.ArchiveException;
031import org.apache.commons.compress.archivers.ArchiveInputStream;
032import org.apache.commons.compress.utils.BoundedInputStream;
033import org.apache.commons.compress.utils.CRC32VerifyingInputStream;
034import org.apache.commons.compress.utils.IOUtils;
035
036/**
037 * Implements the "arj" archive format as an InputStream.
038 * <p>
039 * <a href="https://github.com/FarGroup/FarManager/blob/master/plugins/multiarc/arc.doc/arj.txt">Reference 1</a>
040 * <br>
041 * <a href="http://www.fileformat.info/format/arj/corion.htm">Reference 2</a>
042 * @NotThreadSafe
043 * @since 1.6
044 */
045public class ArjArchiveInputStream extends ArchiveInputStream {
046    private static final int ARJ_MAGIC_1 = 0x60;
047    private static final int ARJ_MAGIC_2 = 0xEA;
048    private final DataInputStream in;
049    private final String charsetName;
050    private final MainHeader mainHeader;
051    private LocalFileHeader currentLocalFileHeader;
052    private InputStream currentInputStream;
053
054    /**
055     * Constructs the ArjInputStream, taking ownership of the inputStream that is passed in.
056     * @param inputStream the underlying stream, whose ownership is taken
057     * @param charsetName the charset used for file names and comments
058     *   in the archive. May be {@code null} to use the platform default.
059     * @throws ArchiveException if an exception occurs while reading
060     */
061    public ArjArchiveInputStream(final InputStream inputStream,
062            final String charsetName) throws ArchiveException {
063        in = new DataInputStream(inputStream);
064        this.charsetName = charsetName;
065        try {
066            mainHeader = readMainHeader();
067            if ((mainHeader.arjFlags & MainHeader.Flags.GARBLED) != 0) {
068                throw new ArchiveException("Encrypted ARJ files are unsupported");
069            }
070            if ((mainHeader.arjFlags & MainHeader.Flags.VOLUME) != 0) {
071                throw new ArchiveException("Multi-volume ARJ files are unsupported");
072            }
073        } catch (final IOException ioException) {
074            throw new ArchiveException(ioException.getMessage(), ioException);
075        }
076    }
077
078    /**
079     * Constructs the ArjInputStream, taking ownership of the inputStream that is passed in,
080     * and using the CP437 character encoding.
081     * @param inputStream the underlying stream, whose ownership is taken
082     * @throws ArchiveException if an exception occurs while reading
083     */
084    public ArjArchiveInputStream(final InputStream inputStream)
085            throws ArchiveException {
086        this(inputStream, "CP437");
087    }
088
089    @Override
090    public void close() throws IOException {
091        in.close();
092    }
093
094    private int read8(final DataInputStream dataIn) throws IOException {
095        final int value = dataIn.readUnsignedByte();
096        count(1);
097        return value;
098    }
099
100    private int read16(final DataInputStream dataIn) throws IOException {
101        final int value = dataIn.readUnsignedShort();
102        count(2);
103        return Integer.reverseBytes(value) >>> 16;
104    }
105
106    private int read32(final DataInputStream dataIn) throws IOException {
107        final int value = dataIn.readInt();
108        count(4);
109        return Integer.reverseBytes(value);
110    }
111
112    private String readString(final DataInputStream dataIn) throws IOException {
113        try (final ByteArrayOutputStream buffer = new ByteArrayOutputStream()) {
114            int nextByte;
115            while ((nextByte = dataIn.readUnsignedByte()) != 0) {
116                buffer.write(nextByte);
117            }
118            if (charsetName != null) {
119                return buffer.toString(charsetName);
120            }
121            // intentionally using the default encoding as that's the contract for a null charsetName
122            return buffer.toString();
123        }
124    }
125
126    private byte[] readRange(final InputStream in, final int len)
127        throws IOException {
128        final byte[] b = IOUtils.readRange(in, len);
129        count(b.length);
130        if (b.length < len) {
131            throw new EOFException();
132        }
133        return b;
134    }
135
136    private byte[] readHeader() throws IOException {
137        boolean found = false;
138        byte[] basicHeaderBytes = null;
139        do {
140            int first = 0;
141            int second = read8(in);
142            do {
143                first = second;
144                second = read8(in);
145            } while (first != ARJ_MAGIC_1 && second != ARJ_MAGIC_2);
146            final int basicHeaderSize = read16(in);
147            if (basicHeaderSize == 0) {
148                // end of archive
149                return null;
150            }
151            if (basicHeaderSize <= 2600) {
152                basicHeaderBytes = readRange(in, basicHeaderSize);
153                final long basicHeaderCrc32 = read32(in) & 0xFFFFFFFFL;
154                final CRC32 crc32 = new CRC32();
155                crc32.update(basicHeaderBytes);
156                if (basicHeaderCrc32 == crc32.getValue()) {
157                    found = true;
158                }
159            }
160        } while (!found);
161        return basicHeaderBytes;
162    }
163
164    private MainHeader readMainHeader() throws IOException {
165        final byte[] basicHeaderBytes = readHeader();
166        if (basicHeaderBytes == null) {
167            throw new IOException("Archive ends without any headers");
168        }
169        final DataInputStream basicHeader = new DataInputStream(
170                new ByteArrayInputStream(basicHeaderBytes));
171
172        final int firstHeaderSize = basicHeader.readUnsignedByte();
173        final byte[] firstHeaderBytes = readRange(basicHeader, firstHeaderSize - 1);
174        pushedBackBytes(firstHeaderBytes.length);
175
176        final DataInputStream firstHeader = new DataInputStream(
177                new ByteArrayInputStream(firstHeaderBytes));
178
179        final MainHeader hdr = new MainHeader();
180        hdr.archiverVersionNumber = firstHeader.readUnsignedByte();
181        hdr.minVersionToExtract = firstHeader.readUnsignedByte();
182        hdr.hostOS = firstHeader.readUnsignedByte();
183        hdr.arjFlags = firstHeader.readUnsignedByte();
184        hdr.securityVersion = firstHeader.readUnsignedByte();
185        hdr.fileType = firstHeader.readUnsignedByte();
186        hdr.reserved = firstHeader.readUnsignedByte();
187        hdr.dateTimeCreated = read32(firstHeader);
188        hdr.dateTimeModified = read32(firstHeader);
189        hdr.archiveSize = 0xffffFFFFL & read32(firstHeader);
190        hdr.securityEnvelopeFilePosition = read32(firstHeader);
191        hdr.fileSpecPosition = read16(firstHeader);
192        hdr.securityEnvelopeLength = read16(firstHeader);
193        pushedBackBytes(20); // count has already counted them via readRange
194        hdr.encryptionVersion = firstHeader.readUnsignedByte();
195        hdr.lastChapter = firstHeader.readUnsignedByte();
196
197        if (firstHeaderSize >= 33) {
198            hdr.arjProtectionFactor = firstHeader.readUnsignedByte();
199            hdr.arjFlags2 = firstHeader.readUnsignedByte();
200            firstHeader.readUnsignedByte();
201            firstHeader.readUnsignedByte();
202        }
203
204        hdr.name = readString(basicHeader);
205        hdr.comment = readString(basicHeader);
206
207        final  int extendedHeaderSize = read16(in);
208        if (extendedHeaderSize > 0) {
209            hdr.extendedHeaderBytes = readRange(in, extendedHeaderSize);
210            final long extendedHeaderCrc32 = 0xffffFFFFL & read32(in);
211            final CRC32 crc32 = new CRC32();
212            crc32.update(hdr.extendedHeaderBytes);
213            if (extendedHeaderCrc32 != crc32.getValue()) {
214                throw new IOException("Extended header CRC32 verification failure");
215            }
216        }
217
218        return hdr;
219    }
220
221    private LocalFileHeader readLocalFileHeader() throws IOException {
222        final byte[] basicHeaderBytes = readHeader();
223        if (basicHeaderBytes == null) {
224            return null;
225        }
226        try (final DataInputStream basicHeader = new DataInputStream(new ByteArrayInputStream(basicHeaderBytes))) {
227
228            final int firstHeaderSize = basicHeader.readUnsignedByte();
229            final byte[] firstHeaderBytes = readRange(basicHeader, firstHeaderSize - 1);
230            pushedBackBytes(firstHeaderBytes.length);
231            try (final DataInputStream firstHeader = new DataInputStream(new ByteArrayInputStream(firstHeaderBytes))) {
232
233                final LocalFileHeader localFileHeader = new LocalFileHeader();
234                localFileHeader.archiverVersionNumber = firstHeader.readUnsignedByte();
235                localFileHeader.minVersionToExtract = firstHeader.readUnsignedByte();
236                localFileHeader.hostOS = firstHeader.readUnsignedByte();
237                localFileHeader.arjFlags = firstHeader.readUnsignedByte();
238                localFileHeader.method = firstHeader.readUnsignedByte();
239                localFileHeader.fileType = firstHeader.readUnsignedByte();
240                localFileHeader.reserved = firstHeader.readUnsignedByte();
241                localFileHeader.dateTimeModified = read32(firstHeader);
242                localFileHeader.compressedSize = 0xffffFFFFL & read32(firstHeader);
243                localFileHeader.originalSize = 0xffffFFFFL & read32(firstHeader);
244                localFileHeader.originalCrc32 = 0xffffFFFFL & read32(firstHeader);
245                localFileHeader.fileSpecPosition = read16(firstHeader);
246                localFileHeader.fileAccessMode = read16(firstHeader);
247                pushedBackBytes(20);
248                localFileHeader.firstChapter = firstHeader.readUnsignedByte();
249                localFileHeader.lastChapter = firstHeader.readUnsignedByte();
250
251                readExtraData(firstHeaderSize, firstHeader, localFileHeader);
252
253                localFileHeader.name = readString(basicHeader);
254                localFileHeader.comment = readString(basicHeader);
255
256                final ArrayList<byte[]> extendedHeaders = new ArrayList<>();
257                int extendedHeaderSize;
258                while ((extendedHeaderSize = read16(in)) > 0) {
259                    final byte[] extendedHeaderBytes = readRange(in, extendedHeaderSize);
260                    final long extendedHeaderCrc32 = 0xffffFFFFL & read32(in);
261                    final CRC32 crc32 = new CRC32();
262                    crc32.update(extendedHeaderBytes);
263                    if (extendedHeaderCrc32 != crc32.getValue()) {
264                        throw new IOException("Extended header CRC32 verification failure");
265                    }
266                    extendedHeaders.add(extendedHeaderBytes);
267                }
268                localFileHeader.extendedHeaders = extendedHeaders.toArray(new byte[0][]);
269
270                return localFileHeader;
271            }
272        }
273    }
274
275    private void readExtraData(final int firstHeaderSize, final DataInputStream firstHeader,
276                               final LocalFileHeader localFileHeader) throws IOException {
277        if (firstHeaderSize >= 33) {
278            localFileHeader.extendedFilePosition = read32(firstHeader);
279            if (firstHeaderSize >= 45) {
280                localFileHeader.dateTimeAccessed = read32(firstHeader);
281                localFileHeader.dateTimeCreated = read32(firstHeader);
282                localFileHeader.originalSizeEvenForVolumes = read32(firstHeader);
283                pushedBackBytes(12);
284            }
285            pushedBackBytes(4);
286        }
287    }
288
289    /**
290     * Checks if the signature matches what is expected for an arj file.
291     *
292     * @param signature
293     *            the bytes to check
294     * @param length
295     *            the number of bytes to check
296     * @return true, if this stream is an arj archive stream, false otherwise
297     */
298    public static boolean matches(final byte[] signature, final int length) {
299        return length >= 2 &&
300                (0xff & signature[0]) == ARJ_MAGIC_1 &&
301                (0xff & signature[1]) == ARJ_MAGIC_2;
302    }
303
304    /**
305     * Gets the archive's recorded name.
306     * @return the archive's name
307     */
308    public String getArchiveName() {
309        return mainHeader.name;
310    }
311
312    /**
313     * Gets the archive's comment.
314     * @return the archive's comment
315     */
316    public String getArchiveComment() {
317        return mainHeader.comment;
318    }
319
320    @Override
321    public ArjArchiveEntry getNextEntry() throws IOException {
322        if (currentInputStream != null) {
323            // return value ignored as IOUtils.skip ensures the stream is drained completely
324            IOUtils.skip(currentInputStream, Long.MAX_VALUE);
325            currentInputStream.close();
326            currentLocalFileHeader = null;
327            currentInputStream = null;
328        }
329
330        currentLocalFileHeader = readLocalFileHeader();
331        if (currentLocalFileHeader != null) {
332            currentInputStream = new BoundedInputStream(in, currentLocalFileHeader.compressedSize);
333            if (currentLocalFileHeader.method == LocalFileHeader.Methods.STORED) {
334                currentInputStream = new CRC32VerifyingInputStream(currentInputStream,
335                        currentLocalFileHeader.originalSize, currentLocalFileHeader.originalCrc32);
336            }
337            return new ArjArchiveEntry(currentLocalFileHeader);
338        }
339        currentInputStream = null;
340        return null;
341    }
342
343    @Override
344    public boolean canReadEntryData(final ArchiveEntry ae) {
345        return ae instanceof ArjArchiveEntry
346            && ((ArjArchiveEntry) ae).getMethod() == LocalFileHeader.Methods.STORED;
347    }
348
349    @Override
350    public int read(final byte[] b, final int off, final int len) throws IOException {
351        if (len == 0) {
352            return 0;
353        }
354        if (currentLocalFileHeader == null) {
355            throw new IllegalStateException("No current arj entry");
356        }
357        if (currentLocalFileHeader.method != LocalFileHeader.Methods.STORED) {
358            throw new IOException("Unsupported compression method " + currentLocalFileHeader.method);
359        }
360        return currentInputStream.read(b, off, len);
361    }
362}