001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.compress.archivers.zip; 019 020import java.io.File; 021import java.io.IOException; 022import java.io.Serializable; 023import java.nio.ByteBuffer; 024import java.nio.channels.SeekableByteChannel; 025import java.nio.file.Files; 026import java.nio.file.Path; 027import java.nio.file.StandardOpenOption; 028import java.util.ArrayList; 029import java.util.Arrays; 030import java.util.Comparator; 031import java.util.List; 032import java.util.Objects; 033import java.util.regex.Pattern; 034import java.util.stream.Collectors; 035import java.util.stream.Stream; 036 037import org.apache.commons.compress.archivers.ArchiveStreamFactory; 038import org.apache.commons.compress.utils.FileNameUtils; 039import org.apache.commons.compress.utils.MultiReadOnlySeekableByteChannel; 040 041/** 042 * {@link MultiReadOnlySeekableByteChannel} that knows what a split ZIP archive should look like. 043 * 044 * <p>If you want to read a split archive using {@link ZipFile} then create an instance of this class from the parts of 045 * the archive.</p> 046 * 047 * @since 1.20 048 */ 049public class ZipSplitReadOnlySeekableByteChannel extends MultiReadOnlySeekableByteChannel { 050 051 private static class ZipSplitSegmentComparator implements Comparator<Path>, Serializable { 052 private static final long serialVersionUID = 20200123L; 053 054 @Override 055 public int compare(final Path file1, final Path file2) { 056 final String extension1 = FileNameUtils.getExtension(file1); 057 final String extension2 = FileNameUtils.getExtension(file2); 058 059 if (!extension1.startsWith("z")) { 060 return -1; 061 } 062 063 if (!extension2.startsWith("z")) { 064 return 1; 065 } 066 067 final Integer splitSegmentNumber1 = Integer.parseInt(extension1.substring(1)); 068 final Integer splitSegmentNumber2 = Integer.parseInt(extension2.substring(1)); 069 070 return splitSegmentNumber1.compareTo(splitSegmentNumber2); 071 } 072 } 073 private static final Path[] EMPTY_PATH_ARRAY = {}; 074 private static final int ZIP_SPLIT_SIGNATURE_LENGTH = 4; 075 076 /** 077 * Concatenates ZIP split files from the last segment(the extension SHOULD be .zip) 078 * 079 * @param lastSegmentFile the last segment of ZIP split files, note that the extension SHOULD be .zip 080 * @return SeekableByteChannel that concatenates all ZIP split files 081 * @throws IllegalArgumentException if the lastSegmentFile's extension is NOT .zip 082 * @throws IOException if the first channel doesn't seem to hold 083 * the beginning of a split archive 084 */ 085 public static SeekableByteChannel buildFromLastSplitSegment(final File lastSegmentFile) throws IOException { 086 return buildFromLastSplitSegment(lastSegmentFile.toPath()); 087 } 088 089 /** 090 * Concatenates ZIP split files from the last segment (the extension MUST be .zip) 091 * @param lastSegmentPath the last segment of ZIP split files, note that the extension MUST be .zip 092 * @return SeekableByteChannel that concatenates all ZIP split files 093 * @throws IllegalArgumentException if the lastSegmentPath's extension is NOT .zip 094 * @throws IOException if the first channel doesn't seem to hold 095 * the beginning of a split archive 096 * @since 1.22 097 */ 098 public static SeekableByteChannel buildFromLastSplitSegment(final Path lastSegmentPath) throws IOException { 099 final String extension = FileNameUtils.getExtension(lastSegmentPath); 100 if (!extension.equalsIgnoreCase(ArchiveStreamFactory.ZIP)) { 101 throw new IllegalArgumentException("The extension of last ZIP split segment should be .zip"); 102 } 103 104 final Path parent = Objects.nonNull(lastSegmentPath.getParent()) ? lastSegmentPath.getParent() 105 : lastSegmentPath.getFileSystem().getPath("."); 106 final String fileBaseName = FileNameUtils.getBaseName(lastSegmentPath); 107 final ArrayList<Path> splitZipSegments; 108 109 // ZIP split segments should be like z01,z02....z(n-1) based on the ZIP specification 110 final Pattern pattern = Pattern.compile(Pattern.quote(fileBaseName) + ".[zZ][0-9]+"); 111 try (Stream<Path> walk = Files.walk(parent, 1)) { 112 splitZipSegments = walk 113 .filter(Files::isRegularFile) 114 .filter(path -> pattern.matcher(path.getFileName().toString()).matches()) 115 .sorted(new ZipSplitSegmentComparator()) 116 .collect(Collectors.toCollection(ArrayList::new)); 117 } 118 119 return forPaths(lastSegmentPath, splitZipSegments); 120 } 121 122 /** 123 * Concatenates the given files. 124 * 125 * @param files the files to concatenate, note that the LAST FILE of files should be the LAST SEGMENT(.zip) 126 * and these files should be added in correct order (e.g. .z01, .z02... .z99, .zip) 127 * @return SeekableByteChannel that concatenates all provided files 128 * @throws NullPointerException if files is null 129 * @throws IOException if opening a channel for one of the files fails 130 * @throws IOException if the first channel doesn't seem to hold 131 * the beginning of a split archive 132 */ 133 public static SeekableByteChannel forFiles(final File... files) throws IOException { 134 final List<Path> paths = new ArrayList<>(); 135 for (final File f : Objects.requireNonNull(files, "files must not be null")) { 136 paths.add(f.toPath()); 137 } 138 139 return forPaths(paths.toArray(EMPTY_PATH_ARRAY)); 140 } 141 142 /** 143 * Concatenates the given files. 144 * 145 * @param lastSegmentFile the last segment of split ZIP segments, its extension should be .zip 146 * @param files the files to concatenate except for the last segment, 147 * note these files should be added in correct order (e.g. .z01, .z02... .z99) 148 * @return SeekableByteChannel that concatenates all provided files 149 * @throws IOException if the first channel doesn't seem to hold 150 * the beginning of a split archive 151 * @throws NullPointerException if files or lastSegmentFile is null 152 */ 153 public static SeekableByteChannel forFiles(final File lastSegmentFile, final Iterable<File> files) throws IOException { 154 Objects.requireNonNull(files, "files"); 155 Objects.requireNonNull(lastSegmentFile, "lastSegmentFile"); 156 157 final List<Path> filesList = new ArrayList<>(); 158 files.forEach(f -> filesList.add(f.toPath())); 159 160 return forPaths(lastSegmentFile.toPath(), filesList); 161 } 162 163 /** 164 * Concatenates the given channels. 165 * 166 * @param channels the channels to concatenate, note that the LAST CHANNEL of channels should be the LAST SEGMENT(.zip) 167 * and these channels should be added in correct order (e.g. .z01, .z02... .z99, .zip) 168 * @return SeekableByteChannel that concatenates all provided channels 169 * @throws NullPointerException if channels is null 170 * @throws IOException if reading channels fails 171 */ 172 public static SeekableByteChannel forOrderedSeekableByteChannels(final SeekableByteChannel... channels) throws IOException { 173 if (Objects.requireNonNull(channels, "channels must not be null").length == 1) { 174 return channels[0]; 175 } 176 return new ZipSplitReadOnlySeekableByteChannel(Arrays.asList(channels)); 177 } 178 179 /** 180 * Concatenates the given channels. 181 * 182 * @param lastSegmentChannel channel of the last segment of split ZIP segments, its extension should be .zip 183 * @param channels the channels to concatenate except for the last segment, 184 * note these channels should be added in correct order (e.g. .z01, .z02... .z99) 185 * @return SeekableByteChannel that concatenates all provided channels 186 * @throws NullPointerException if lastSegmentChannel or channels is null 187 * @throws IOException if the first channel doesn't seem to hold 188 * the beginning of a split archive 189 */ 190 public static SeekableByteChannel forOrderedSeekableByteChannels(final SeekableByteChannel lastSegmentChannel, 191 final Iterable<SeekableByteChannel> channels) throws IOException { 192 Objects.requireNonNull(channels, "channels"); 193 Objects.requireNonNull(lastSegmentChannel, "lastSegmentChannel"); 194 195 final List<SeekableByteChannel> channelsList = new ArrayList<>(); 196 channels.forEach(channelsList::add); 197 channelsList.add(lastSegmentChannel); 198 199 return forOrderedSeekableByteChannels(channelsList.toArray(new SeekableByteChannel[0])); 200 } 201 202 /** 203 * Concatenates the given file paths. 204 * @param paths the file paths to concatenate, note that the LAST FILE of files should be the LAST SEGMENT(.zip) 205 * and these files should be added in correct order (e.g.: .z01, .z02... .z99, .zip) 206 * @return SeekableByteChannel that concatenates all provided files 207 * @throws NullPointerException if files is null 208 * @throws IOException if opening a channel for one of the files fails 209 * @throws IOException if the first channel doesn't seem to hold 210 * the beginning of a split archive 211 * @since 1.22 212 */ 213 public static SeekableByteChannel forPaths(final Path... paths) throws IOException { 214 final List<SeekableByteChannel> channels = new ArrayList<>(); 215 for (final Path path : Objects.requireNonNull(paths, "paths must not be null")) { 216 channels.add(Files.newByteChannel(path, StandardOpenOption.READ)); 217 } 218 if (channels.size() == 1) { 219 return channels.get(0); 220 } 221 return new ZipSplitReadOnlySeekableByteChannel(channels); 222 } 223 224 /** 225 * Concatenates the given file paths. 226 * @param lastSegmentPath the last segment path of split ZIP segments, its extension must be .zip 227 * @param paths the file paths to concatenate except for the last segment, 228 * note these files should be added in correct order (e.g.: .z01, .z02... .z99) 229 * @return SeekableByteChannel that concatenates all provided files 230 * @throws IOException if the first channel doesn't seem to hold 231 * the beginning of a split archive 232 * @throws NullPointerException if files or lastSegmentPath is null 233 * @since 1.22 234 */ 235 public static SeekableByteChannel forPaths(final Path lastSegmentPath, final Iterable<Path> paths) throws IOException { 236 Objects.requireNonNull(paths, "paths"); 237 Objects.requireNonNull(lastSegmentPath, "lastSegmentPath"); 238 239 final List<Path> filesList = new ArrayList<>(); 240 paths.forEach(filesList::add); 241 filesList.add(lastSegmentPath); 242 243 return forPaths(filesList.toArray(EMPTY_PATH_ARRAY)); 244 } 245 246 private final ByteBuffer zipSplitSignatureByteBuffer = 247 ByteBuffer.allocate(ZIP_SPLIT_SIGNATURE_LENGTH); 248 249 /** 250 * Concatenates the given channels. 251 * 252 * <p>The channels should be add in ascending order, e.g. z01, 253 * z02, ... z99, ZIP please note that the .zip file is the last 254 * segment and should be added as the last one in the channels</p> 255 * 256 * @param channels the channels to concatenate 257 * @throws NullPointerException if channels is null 258 * @throws IOException if the first channel doesn't seem to hold 259 * the beginning of a split archive 260 */ 261 public ZipSplitReadOnlySeekableByteChannel(final List<SeekableByteChannel> channels) 262 throws IOException { 263 super(channels); 264 265 // the first split ZIP segment should begin with ZIP split signature 266 assertSplitSignature(channels); 267 } 268 269 /** 270 * Based on the ZIP specification: 271 * 272 * <p> 273 * 8.5.3 Spanned/Split archives created using PKZIP for Windows 274 * (V2.50 or greater), PKZIP Command Line (V2.50 or greater), 275 * or PKZIP Explorer will include a special spanning 276 * signature as the first 4 bytes of the first segment of 277 * the archive. This signature (0x08074b50) will be 278 * followed immediately by the local header signature for 279 * the first file in the archive. 280 * 281 * <p> 282 * the first 4 bytes of the first ZIP split segment should be the ZIP split signature(0x08074B50) 283 * 284 * @param channels channels to be validated 285 * @throws IOException 286 */ 287 private void assertSplitSignature(final List<SeekableByteChannel> channels) 288 throws IOException { 289 final SeekableByteChannel channel = channels.get(0); 290 // the ZIP split file signature is at the beginning of the first split segment 291 channel.position(0L); 292 293 zipSplitSignatureByteBuffer.rewind(); 294 channel.read(zipSplitSignatureByteBuffer); 295 final ZipLong signature = new ZipLong(zipSplitSignatureByteBuffer.array()); 296 if (!signature.equals(ZipLong.DD_SIG)) { 297 channel.position(0L); 298 throw new IOException("The first ZIP split segment does not begin with split ZIP file signature"); 299 } 300 301 channel.position(0L); 302 } 303}