001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers.zip; 020 021import java.io.ByteArrayInputStream; 022import java.io.ByteArrayOutputStream; 023import java.io.EOFException; 024import java.io.IOException; 025import java.io.InputStream; 026import java.io.PushbackInputStream; 027import java.math.BigInteger; 028import java.nio.ByteBuffer; 029import java.util.Arrays; 030import java.util.zip.CRC32; 031import java.util.zip.DataFormatException; 032import java.util.zip.Inflater; 033import java.util.zip.ZipEntry; 034import java.util.zip.ZipException; 035 036import org.apache.commons.compress.archivers.ArchiveEntry; 037import org.apache.commons.compress.archivers.ArchiveInputStream; 038import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; 039import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream; 040import org.apache.commons.compress.utils.ArchiveUtils; 041import org.apache.commons.compress.utils.IOUtils; 042import org.apache.commons.compress.utils.InputStreamStatistics; 043 044import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD; 045import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT; 046import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD; 047import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC; 048 049/** 050 * Implements an input stream that can read Zip archives. 051 * 052 * <p>As of Apache Commons Compress it transparently supports Zip64 053 * extensions and thus individual entries and archives larger than 4 054 * GB or with more than 65536 entries.</p> 055 * 056 * <p>The {@link ZipFile} class is preferred when reading from files 057 * as {@link ZipArchiveInputStream} is limited by not being able to 058 * read the central directory header before returning entries. In 059 * particular {@link ZipArchiveInputStream}</p> 060 * 061 * <ul> 062 * 063 * <li>may return entries that are not part of the central directory 064 * at all and shouldn't be considered part of the archive.</li> 065 * 066 * <li>may return several entries with the same name.</li> 067 * 068 * <li>will not return internal or external attributes.</li> 069 * 070 * <li>may return incomplete extra field data.</li> 071 * 072 * <li>may return unknown sizes and CRC values for entries until the 073 * next entry has been reached if the archive uses the data 074 * descriptor feature.</li> 075 * 076 * </ul> 077 * 078 * @see ZipFile 079 * @NotThreadSafe 080 */ 081public class ZipArchiveInputStream extends ArchiveInputStream implements InputStreamStatistics { 082 083 /** The zip encoding to use for file names and the file comment. */ 084 private final ZipEncoding zipEncoding; 085 086 // the provided encoding (for unit tests) 087 final String encoding; 088 089 /** Whether to look for and use Unicode extra fields. */ 090 private final boolean useUnicodeExtraFields; 091 092 /** Wrapped stream, will always be a PushbackInputStream. */ 093 private final InputStream in; 094 095 /** Inflater used for all deflated entries. */ 096 private final Inflater inf = new Inflater(true); 097 098 /** Buffer used to read from the wrapped stream. */ 099 private final ByteBuffer buf = ByteBuffer.allocate(ZipArchiveOutputStream.BUFFER_SIZE); 100 101 /** The entry that is currently being read. */ 102 private CurrentEntry current = null; 103 104 /** Whether the stream has been closed. */ 105 private boolean closed = false; 106 107 /** Whether the stream has reached the central directory - and thus found all entries. */ 108 private boolean hitCentralDirectory = false; 109 110 /** 111 * When reading a stored entry that uses the data descriptor this 112 * stream has to read the full entry and caches it. This is the 113 * cache. 114 */ 115 private ByteArrayInputStream lastStoredEntry = null; 116 117 /** Whether the stream will try to read STORED entries that use a data descriptor. */ 118 private boolean allowStoredEntriesWithDataDescriptor = false; 119 120 /** Count decompressed bytes for current entry */ 121 private long uncompressedCount = 0; 122 123 private static final int LFH_LEN = 30; 124 /* 125 local file header signature WORD 126 version needed to extract SHORT 127 general purpose bit flag SHORT 128 compression method SHORT 129 last mod file time SHORT 130 last mod file date SHORT 131 crc-32 WORD 132 compressed size WORD 133 uncompressed size WORD 134 file name length SHORT 135 extra field length SHORT 136 */ 137 138 private static final int CFH_LEN = 46; 139 /* 140 central file header signature WORD 141 version made by SHORT 142 version needed to extract SHORT 143 general purpose bit flag SHORT 144 compression method SHORT 145 last mod file time SHORT 146 last mod file date SHORT 147 crc-32 WORD 148 compressed size WORD 149 uncompressed size WORD 150 file name length SHORT 151 extra field length SHORT 152 file comment length SHORT 153 disk number start SHORT 154 internal file attributes SHORT 155 external file attributes WORD 156 relative offset of local header WORD 157 */ 158 159 private static final long TWO_EXP_32 = ZIP64_MAGIC + 1; 160 161 // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) 162 private final byte[] lfhBuf = new byte[LFH_LEN]; 163 private final byte[] skipBuf = new byte[1024]; 164 private final byte[] shortBuf = new byte[SHORT]; 165 private final byte[] wordBuf = new byte[WORD]; 166 private final byte[] twoDwordBuf = new byte[2 * DWORD]; 167 168 private int entriesRead = 0; 169 170 /** 171 * Create an instance using UTF-8 encoding 172 * @param inputStream the stream to wrap 173 */ 174 public ZipArchiveInputStream(final InputStream inputStream) { 175 this(inputStream, ZipEncodingHelper.UTF8); 176 } 177 178 /** 179 * Create an instance using the specified encoding 180 * @param inputStream the stream to wrap 181 * @param encoding the encoding to use for file names, use null 182 * for the platform's default encoding 183 * @since 1.5 184 */ 185 public ZipArchiveInputStream(final InputStream inputStream, final String encoding) { 186 this(inputStream, encoding, true); 187 } 188 189 /** 190 * Create an instance using the specified encoding 191 * @param inputStream the stream to wrap 192 * @param encoding the encoding to use for file names, use null 193 * for the platform's default encoding 194 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 195 * Extra Fields (if present) to set the file names. 196 */ 197 public ZipArchiveInputStream(final InputStream inputStream, final String encoding, final boolean useUnicodeExtraFields) { 198 this(inputStream, encoding, useUnicodeExtraFields, false); 199 } 200 201 /** 202 * Create an instance using the specified encoding 203 * @param inputStream the stream to wrap 204 * @param encoding the encoding to use for file names, use null 205 * for the platform's default encoding 206 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 207 * Extra Fields (if present) to set the file names. 208 * @param allowStoredEntriesWithDataDescriptor whether the stream 209 * will try to read STORED entries that use a data descriptor 210 * @since 1.1 211 */ 212 public ZipArchiveInputStream(final InputStream inputStream, 213 final String encoding, 214 final boolean useUnicodeExtraFields, 215 final boolean allowStoredEntriesWithDataDescriptor) { 216 this.encoding = encoding; 217 zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 218 this.useUnicodeExtraFields = useUnicodeExtraFields; 219 in = new PushbackInputStream(inputStream, buf.capacity()); 220 this.allowStoredEntriesWithDataDescriptor = 221 allowStoredEntriesWithDataDescriptor; 222 // haven't read anything so far 223 buf.limit(0); 224 } 225 226 public ZipArchiveEntry getNextZipEntry() throws IOException { 227 uncompressedCount = 0; 228 229 boolean firstEntry = true; 230 if (closed || hitCentralDirectory) { 231 return null; 232 } 233 if (current != null) { 234 closeEntry(); 235 firstEntry = false; 236 } 237 238 long currentHeaderOffset = getBytesRead(); 239 try { 240 if (firstEntry) { 241 // split archives have a special signature before the 242 // first local file header - look for it and fail with 243 // the appropriate error message if this is a split 244 // archive. 245 readFirstLocalFileHeader(lfhBuf); 246 } else { 247 readFully(lfhBuf); 248 } 249 } catch (final EOFException e) { //NOSONAR 250 return null; 251 } 252 253 final ZipLong sig = new ZipLong(lfhBuf); 254 if (!sig.equals(ZipLong.LFH_SIG)) { 255 if (sig.equals(ZipLong.CFH_SIG) || sig.equals(ZipLong.AED_SIG) || isApkSigningBlock(lfhBuf)) { 256 hitCentralDirectory = true; 257 skipRemainderOfArchive(); 258 return null; 259 } 260 throw new ZipException(String.format("Unexpected record signature: 0X%X", sig.getValue())); 261 } 262 263 int off = WORD; 264 current = new CurrentEntry(); 265 266 final int versionMadeBy = ZipShort.getValue(lfhBuf, off); 267 off += SHORT; 268 current.entry.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT) & ZipFile.NIBLET_MASK); 269 270 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(lfhBuf, off); 271 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); 272 final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding; 273 current.hasDataDescriptor = gpFlag.usesDataDescriptor(); 274 current.entry.setGeneralPurposeBit(gpFlag); 275 276 off += SHORT; 277 278 current.entry.setMethod(ZipShort.getValue(lfhBuf, off)); 279 off += SHORT; 280 281 final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(lfhBuf, off)); 282 current.entry.setTime(time); 283 off += WORD; 284 285 ZipLong size = null, cSize = null; 286 if (!current.hasDataDescriptor) { 287 current.entry.setCrc(ZipLong.getValue(lfhBuf, off)); 288 off += WORD; 289 290 cSize = new ZipLong(lfhBuf, off); 291 off += WORD; 292 293 size = new ZipLong(lfhBuf, off); 294 off += WORD; 295 } else { 296 off += 3 * WORD; 297 } 298 299 final int fileNameLen = ZipShort.getValue(lfhBuf, off); 300 301 off += SHORT; 302 303 final int extraLen = ZipShort.getValue(lfhBuf, off); 304 off += SHORT; // NOSONAR - assignment as documentation 305 306 final byte[] fileName = new byte[fileNameLen]; 307 readFully(fileName); 308 current.entry.setName(entryEncoding.decode(fileName), fileName); 309 if (hasUTF8Flag) { 310 current.entry.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG); 311 } 312 313 final byte[] extraData = new byte[extraLen]; 314 readFully(extraData); 315 current.entry.setExtra(extraData); 316 317 if (!hasUTF8Flag && useUnicodeExtraFields) { 318 ZipUtil.setNameAndCommentFromExtraFields(current.entry, fileName, null); 319 } 320 321 processZip64Extra(size, cSize); 322 323 current.entry.setLocalHeaderOffset(currentHeaderOffset); 324 current.entry.setDataOffset(getBytesRead()); 325 current.entry.setStreamContiguous(true); 326 327 ZipMethod m = ZipMethod.getMethodByCode(current.entry.getMethod()); 328 if (current.entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN) { 329 if (ZipUtil.canHandleEntryData(current.entry) && m != ZipMethod.STORED && m != ZipMethod.DEFLATED) { 330 InputStream bis = new BoundedInputStream(in, current.entry.getCompressedSize()); 331 switch (m) { 332 case UNSHRINKING: 333 current.in = new UnshrinkingInputStream(bis); 334 break; 335 case IMPLODING: 336 current.in = new ExplodingInputStream( 337 current.entry.getGeneralPurposeBit().getSlidingDictionarySize(), 338 current.entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), 339 bis); 340 break; 341 case BZIP2: 342 current.in = new BZip2CompressorInputStream(bis); 343 break; 344 case ENHANCED_DEFLATED: 345 current.in = new Deflate64CompressorInputStream(bis); 346 break; 347 default: 348 // we should never get here as all supported methods have been covered 349 // will cause an error when read is invoked, don't throw an exception here so people can 350 // skip unsupported entries 351 break; 352 } 353 } 354 } else if (m == ZipMethod.ENHANCED_DEFLATED) { 355 current.in = new Deflate64CompressorInputStream(in); 356 } 357 358 entriesRead++; 359 return current.entry; 360 } 361 362 /** 363 * Fills the given array with the first local file header and 364 * deals with splitting/spanning markers that may prefix the first 365 * LFH. 366 */ 367 private void readFirstLocalFileHeader(final byte[] lfh) throws IOException { 368 readFully(lfh); 369 final ZipLong sig = new ZipLong(lfh); 370 if (sig.equals(ZipLong.DD_SIG)) { 371 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.SPLITTING); 372 } 373 374 if (sig.equals(ZipLong.SINGLE_SEGMENT_SPLIT_MARKER)) { 375 // The archive is not really split as only one segment was 376 // needed in the end. Just skip over the marker. 377 final byte[] missedLfhBytes = new byte[4]; 378 readFully(missedLfhBytes); 379 System.arraycopy(lfh, 4, lfh, 0, LFH_LEN - 4); 380 System.arraycopy(missedLfhBytes, 0, lfh, LFH_LEN - 4, 4); 381 } 382 } 383 384 /** 385 * Records whether a Zip64 extra is present and sets the size 386 * information from it if sizes are 0xFFFFFFFF and the entry 387 * doesn't use a data descriptor. 388 */ 389 private void processZip64Extra(final ZipLong size, final ZipLong cSize) { 390 final Zip64ExtendedInformationExtraField z64 = 391 (Zip64ExtendedInformationExtraField) 392 current.entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID); 393 current.usesZip64 = z64 != null; 394 if (!current.hasDataDescriptor) { 395 if (z64 != null // same as current.usesZip64 but avoids NPE warning 396 && (ZipLong.ZIP64_MAGIC.equals(cSize) || ZipLong.ZIP64_MAGIC.equals(size)) ) { 397 current.entry.setCompressedSize(z64.getCompressedSize().getLongValue()); 398 current.entry.setSize(z64.getSize().getLongValue()); 399 } else if (cSize != null && size != null) { 400 current.entry.setCompressedSize(cSize.getValue()); 401 current.entry.setSize(size.getValue()); 402 } 403 } 404 } 405 406 @Override 407 public ArchiveEntry getNextEntry() throws IOException { 408 return getNextZipEntry(); 409 } 410 411 /** 412 * Whether this class is able to read the given entry. 413 * 414 * <p>May return false if it is set up to use encryption or a 415 * compression method that hasn't been implemented yet.</p> 416 * @since 1.1 417 */ 418 @Override 419 public boolean canReadEntryData(final ArchiveEntry ae) { 420 if (ae instanceof ZipArchiveEntry) { 421 final ZipArchiveEntry ze = (ZipArchiveEntry) ae; 422 return ZipUtil.canHandleEntryData(ze) 423 && supportsDataDescriptorFor(ze) 424 && supportsCompressedSizeFor(ze); 425 } 426 return false; 427 } 428 429 @Override 430 public int read(final byte[] buffer, final int offset, final int length) throws IOException { 431 if (closed) { 432 throw new IOException("The stream is closed"); 433 } 434 435 if (current == null) { 436 return -1; 437 } 438 439 // avoid int overflow, check null buffer 440 if (offset > buffer.length || length < 0 || offset < 0 || buffer.length - offset < length) { 441 throw new ArrayIndexOutOfBoundsException(); 442 } 443 444 ZipUtil.checkRequestedFeatures(current.entry); 445 if (!supportsDataDescriptorFor(current.entry)) { 446 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.DATA_DESCRIPTOR, 447 current.entry); 448 } 449 if (!supportsCompressedSizeFor(current.entry)) { 450 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.UNKNOWN_COMPRESSED_SIZE, 451 current.entry); 452 } 453 454 int read; 455 if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) { 456 read = readStored(buffer, offset, length); 457 } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) { 458 read = readDeflated(buffer, offset, length); 459 } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode() 460 || current.entry.getMethod() == ZipMethod.IMPLODING.getCode() 461 || current.entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode() 462 || current.entry.getMethod() == ZipMethod.BZIP2.getCode()) { 463 read = current.in.read(buffer, offset, length); 464 } else { 465 throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(current.entry.getMethod()), 466 current.entry); 467 } 468 469 if (read >= 0) { 470 current.crc.update(buffer, offset, read); 471 uncompressedCount += read; 472 } 473 474 return read; 475 } 476 477 /** 478 * @since 1.17 479 */ 480 @Override 481 public long getCompressedCount() { 482 if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) { 483 return current.bytesRead; 484 } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) { 485 return getBytesInflated(); 486 } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()) { 487 return ((UnshrinkingInputStream) current.in).getCompressedCount(); 488 } else if (current.entry.getMethod() == ZipMethod.IMPLODING.getCode()) { 489 return ((ExplodingInputStream) current.in).getCompressedCount(); 490 } else if (current.entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode()) { 491 return ((Deflate64CompressorInputStream) current.in).getCompressedCount(); 492 } else if (current.entry.getMethod() == ZipMethod.BZIP2.getCode()) { 493 return ((BZip2CompressorInputStream) current.in).getCompressedCount(); 494 } else { 495 return -1; 496 } 497 } 498 499 /** 500 * @since 1.17 501 */ 502 @Override 503 public long getUncompressedCount() { 504 return uncompressedCount; 505 } 506 507 /** 508 * Implementation of read for STORED entries. 509 */ 510 private int readStored(final byte[] buffer, final int offset, final int length) throws IOException { 511 512 if (current.hasDataDescriptor) { 513 if (lastStoredEntry == null) { 514 readStoredEntry(); 515 } 516 return lastStoredEntry.read(buffer, offset, length); 517 } 518 519 final long csize = current.entry.getSize(); 520 if (current.bytesRead >= csize) { 521 return -1; 522 } 523 524 if (buf.position() >= buf.limit()) { 525 buf.position(0); 526 final int l = in.read(buf.array()); 527 if (l == -1) { 528 buf.limit(0); 529 throw new IOException("Truncated ZIP file"); 530 } 531 buf.limit(l); 532 533 count(l); 534 current.bytesReadFromStream += l; 535 } 536 537 int toRead = Math.min(buf.remaining(), length); 538 if ((csize - current.bytesRead) < toRead) { 539 // if it is smaller than toRead then it fits into an int 540 toRead = (int) (csize - current.bytesRead); 541 } 542 buf.get(buffer, offset, toRead); 543 current.bytesRead += toRead; 544 return toRead; 545 } 546 547 /** 548 * Implementation of read for DEFLATED entries. 549 */ 550 private int readDeflated(final byte[] buffer, final int offset, final int length) throws IOException { 551 final int read = readFromInflater(buffer, offset, length); 552 if (read <= 0) { 553 if (inf.finished()) { 554 return -1; 555 } else if (inf.needsDictionary()) { 556 throw new ZipException("This archive needs a preset dictionary" 557 + " which is not supported by Commons" 558 + " Compress."); 559 } else if (read == -1) { 560 throw new IOException("Truncated ZIP file"); 561 } 562 } 563 return read; 564 } 565 566 /** 567 * Potentially reads more bytes to fill the inflater's buffer and 568 * reads from it. 569 */ 570 private int readFromInflater(final byte[] buffer, final int offset, final int length) throws IOException { 571 int read = 0; 572 do { 573 if (inf.needsInput()) { 574 final int l = fill(); 575 if (l > 0) { 576 current.bytesReadFromStream += buf.limit(); 577 } else if (l == -1) { 578 return -1; 579 } else { 580 break; 581 } 582 } 583 try { 584 read = inf.inflate(buffer, offset, length); 585 } catch (final DataFormatException e) { 586 throw (IOException) new ZipException(e.getMessage()).initCause(e); 587 } 588 } while (read == 0 && inf.needsInput()); 589 return read; 590 } 591 592 @Override 593 public void close() throws IOException { 594 if (!closed) { 595 closed = true; 596 try { 597 in.close(); 598 } finally { 599 inf.end(); 600 } 601 } 602 } 603 604 /** 605 * Skips over and discards value bytes of data from this input 606 * stream. 607 * 608 * <p>This implementation may end up skipping over some smaller 609 * number of bytes, possibly 0, if and only if it reaches the end 610 * of the underlying stream.</p> 611 * 612 * <p>The actual number of bytes skipped is returned.</p> 613 * 614 * @param value the number of bytes to be skipped. 615 * @return the actual number of bytes skipped. 616 * @throws IOException - if an I/O error occurs. 617 * @throws IllegalArgumentException - if value is negative. 618 */ 619 @Override 620 public long skip(final long value) throws IOException { 621 if (value >= 0) { 622 long skipped = 0; 623 while (skipped < value) { 624 final long rem = value - skipped; 625 final int x = read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length)); 626 if (x == -1) { 627 return skipped; 628 } 629 skipped += x; 630 } 631 return skipped; 632 } 633 throw new IllegalArgumentException(); 634 } 635 636 /** 637 * Checks if the signature matches what is expected for a zip file. 638 * Does not currently handle self-extracting zips which may have arbitrary 639 * leading content. 640 * 641 * @param signature the bytes to check 642 * @param length the number of bytes to check 643 * @return true, if this stream is a zip archive stream, false otherwise 644 */ 645 public static boolean matches(final byte[] signature, final int length) { 646 if (length < ZipArchiveOutputStream.LFH_SIG.length) { 647 return false; 648 } 649 650 return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file 651 || checksig(signature, ZipArchiveOutputStream.EOCD_SIG) // empty zip 652 || checksig(signature, ZipArchiveOutputStream.DD_SIG) // split zip 653 || checksig(signature, ZipLong.SINGLE_SEGMENT_SPLIT_MARKER.getBytes()); 654 } 655 656 private static boolean checksig(final byte[] signature, final byte[] expected) { 657 for (int i = 0; i < expected.length; i++) { 658 if (signature[i] != expected[i]) { 659 return false; 660 } 661 } 662 return true; 663 } 664 665 /** 666 * Closes the current ZIP archive entry and positions the underlying 667 * stream to the beginning of the next entry. All per-entry variables 668 * and data structures are cleared. 669 * <p> 670 * If the compressed size of this entry is included in the entry header, 671 * then any outstanding bytes are simply skipped from the underlying 672 * stream without uncompressing them. This allows an entry to be safely 673 * closed even if the compression method is unsupported. 674 * <p> 675 * In case we don't know the compressed size of this entry or have 676 * already buffered too much data from the underlying stream to support 677 * uncompression, then the uncompression process is completed and the 678 * end position of the stream is adjusted based on the result of that 679 * process. 680 * 681 * @throws IOException if an error occurs 682 */ 683 private void closeEntry() throws IOException { 684 if (closed) { 685 throw new IOException("The stream is closed"); 686 } 687 if (current == null) { 688 return; 689 } 690 691 // Ensure all entry bytes are read 692 if (currentEntryHasOutstandingBytes()) { 693 drainCurrentEntryData(); 694 } else { 695 // this is guaranteed to exhaust the stream 696 skip(Long.MAX_VALUE); //NOSONAR 697 698 final long inB = current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED 699 ? getBytesInflated() : current.bytesRead; 700 701 // this is at most a single read() operation and can't 702 // exceed the range of int 703 final int diff = (int) (current.bytesReadFromStream - inB); 704 705 // Pushback any required bytes 706 if (diff > 0) { 707 pushback(buf.array(), buf.limit() - diff, diff); 708 current.bytesReadFromStream -= diff; 709 } 710 711 // Drain remainder of entry if not all data bytes were required 712 if (currentEntryHasOutstandingBytes()) { 713 drainCurrentEntryData(); 714 } 715 } 716 717 if (lastStoredEntry == null && current.hasDataDescriptor) { 718 readDataDescriptor(); 719 } 720 721 inf.reset(); 722 buf.clear().flip(); 723 current = null; 724 lastStoredEntry = null; 725 } 726 727 /** 728 * If the compressed size of the current entry is included in the entry header 729 * and there are any outstanding bytes in the underlying stream, then 730 * this returns true. 731 * 732 * @return true, if current entry is determined to have outstanding bytes, false otherwise 733 */ 734 private boolean currentEntryHasOutstandingBytes() { 735 return current.bytesReadFromStream <= current.entry.getCompressedSize() 736 && !current.hasDataDescriptor; 737 } 738 739 /** 740 * Read all data of the current entry from the underlying stream 741 * that hasn't been read, yet. 742 */ 743 private void drainCurrentEntryData() throws IOException { 744 long remaining = current.entry.getCompressedSize() - current.bytesReadFromStream; 745 while (remaining > 0) { 746 final long n = in.read(buf.array(), 0, (int) Math.min(buf.capacity(), remaining)); 747 if (n < 0) { 748 throw new EOFException("Truncated ZIP entry: " 749 + ArchiveUtils.sanitize(current.entry.getName())); 750 } 751 count(n); 752 remaining -= n; 753 } 754 } 755 756 /** 757 * Get the number of bytes Inflater has actually processed. 758 * 759 * <p>for Java < Java7 the getBytes* methods in 760 * Inflater/Deflater seem to return unsigned ints rather than 761 * longs that start over with 0 at 2^32.</p> 762 * 763 * <p>The stream knows how many bytes it has read, but not how 764 * many the Inflater actually consumed - it should be between the 765 * total number of bytes read for the entry and the total number 766 * minus the last read operation. Here we just try to make the 767 * value close enough to the bytes we've read by assuming the 768 * number of bytes consumed must be smaller than (or equal to) the 769 * number of bytes read but not smaller by more than 2^32.</p> 770 */ 771 private long getBytesInflated() { 772 long inB = inf.getBytesRead(); 773 if (current.bytesReadFromStream >= TWO_EXP_32) { 774 while (inB + TWO_EXP_32 <= current.bytesReadFromStream) { 775 inB += TWO_EXP_32; 776 } 777 } 778 return inB; 779 } 780 781 private int fill() throws IOException { 782 if (closed) { 783 throw new IOException("The stream is closed"); 784 } 785 final int length = in.read(buf.array()); 786 if (length > 0) { 787 buf.limit(length); 788 count(buf.limit()); 789 inf.setInput(buf.array(), 0, buf.limit()); 790 } 791 return length; 792 } 793 794 private void readFully(final byte[] b) throws IOException { 795 readFully(b, 0); 796 } 797 798 private void readFully(final byte[] b, final int off) throws IOException { 799 final int len = b.length - off; 800 final int count = IOUtils.readFully(in, b, off, len); 801 count(count); 802 if (count < len) { 803 throw new EOFException(); 804 } 805 } 806 807 private void readDataDescriptor() throws IOException { 808 readFully(wordBuf); 809 ZipLong val = new ZipLong(wordBuf); 810 if (ZipLong.DD_SIG.equals(val)) { 811 // data descriptor with signature, skip sig 812 readFully(wordBuf); 813 val = new ZipLong(wordBuf); 814 } 815 current.entry.setCrc(val.getValue()); 816 817 // if there is a ZIP64 extra field, sizes are eight bytes 818 // each, otherwise four bytes each. Unfortunately some 819 // implementations - namely Java7 - use eight bytes without 820 // using a ZIP64 extra field - 821 // https://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588 822 823 // just read 16 bytes and check whether bytes nine to twelve 824 // look like one of the signatures of what could follow a data 825 // descriptor (ignoring archive decryption headers for now). 826 // If so, push back eight bytes and assume sizes are four 827 // bytes, otherwise sizes are eight bytes each. 828 readFully(twoDwordBuf); 829 final ZipLong potentialSig = new ZipLong(twoDwordBuf, DWORD); 830 if (potentialSig.equals(ZipLong.CFH_SIG) || potentialSig.equals(ZipLong.LFH_SIG)) { 831 pushback(twoDwordBuf, DWORD, DWORD); 832 current.entry.setCompressedSize(ZipLong.getValue(twoDwordBuf)); 833 current.entry.setSize(ZipLong.getValue(twoDwordBuf, WORD)); 834 } else { 835 current.entry.setCompressedSize(ZipEightByteInteger.getLongValue(twoDwordBuf)); 836 current.entry.setSize(ZipEightByteInteger.getLongValue(twoDwordBuf, DWORD)); 837 } 838 } 839 840 /** 841 * Whether this entry requires a data descriptor this library can work with. 842 * 843 * @return true if allowStoredEntriesWithDataDescriptor is true, 844 * the entry doesn't require any data descriptor or the method is 845 * DEFLATED or ENHANCED_DEFLATED. 846 */ 847 private boolean supportsDataDescriptorFor(final ZipArchiveEntry entry) { 848 return !entry.getGeneralPurposeBit().usesDataDescriptor() 849 850 || (allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED) 851 || entry.getMethod() == ZipEntry.DEFLATED 852 || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode(); 853 } 854 855 /** 856 * Whether the compressed size for the entry is either known or 857 * not required by the compression method being used. 858 */ 859 private boolean supportsCompressedSizeFor(final ZipArchiveEntry entry) { 860 return entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN 861 || entry.getMethod() == ZipEntry.DEFLATED 862 || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode() 863 || (entry.getGeneralPurposeBit().usesDataDescriptor() 864 && allowStoredEntriesWithDataDescriptor 865 && entry.getMethod() == ZipEntry.STORED); 866 } 867 868 private static final String USE_ZIPFILE_INSTEAD_OF_STREAM_DISCLAIMER = 869 " while reading a stored entry using data descriptor. Either the archive is broken" 870 + " or it can not be read using ZipArchiveInputStream and you must use ZipFile." 871 + " A common cause for this is a ZIP archive containing a ZIP archive." 872 + " See http://commons.apache.org/proper/commons-compress/zip.html#ZipArchiveInputStream_vs_ZipFile"; 873 874 /** 875 * Caches a stored entry that uses the data descriptor. 876 * 877 * <ul> 878 * <li>Reads a stored entry until the signature of a local file 879 * header, central directory header or data descriptor has been 880 * found.</li> 881 * <li>Stores all entry data in lastStoredEntry.</p> 882 * <li>Rewinds the stream to position at the data 883 * descriptor.</li> 884 * <li>reads the data descriptor</li> 885 * </ul> 886 * 887 * <p>After calling this method the entry should know its size, 888 * the entry's data is cached and the stream is positioned at the 889 * next local file or central directory header.</p> 890 */ 891 private void readStoredEntry() throws IOException { 892 final ByteArrayOutputStream bos = new ByteArrayOutputStream(); 893 int off = 0; 894 boolean done = false; 895 896 // length of DD without signature 897 final int ddLen = current.usesZip64 ? WORD + 2 * DWORD : 3 * WORD; 898 899 while (!done) { 900 final int r = in.read(buf.array(), off, ZipArchiveOutputStream.BUFFER_SIZE - off); 901 if (r <= 0) { 902 // read the whole archive without ever finding a 903 // central directory 904 throw new IOException("Truncated ZIP file"); 905 } 906 if (r + off < 4) { 907 // buffer too small to check for a signature, loop 908 off += r; 909 continue; 910 } 911 912 done = bufferContainsSignature(bos, off, r, ddLen); 913 if (!done) { 914 off = cacheBytesRead(bos, off, r, ddLen); 915 } 916 } 917 if (current.entry.getCompressedSize() != current.entry.getSize()) { 918 throw new ZipException("compressed and uncompressed size don't match" 919 + USE_ZIPFILE_INSTEAD_OF_STREAM_DISCLAIMER); 920 } 921 final byte[] b = bos.toByteArray(); 922 if (b.length != current.entry.getSize()) { 923 throw new ZipException("actual and claimed size don't match" 924 + USE_ZIPFILE_INSTEAD_OF_STREAM_DISCLAIMER); 925 } 926 lastStoredEntry = new ByteArrayInputStream(b); 927 } 928 929 private static final byte[] LFH = ZipLong.LFH_SIG.getBytes(); 930 private static final byte[] CFH = ZipLong.CFH_SIG.getBytes(); 931 private static final byte[] DD = ZipLong.DD_SIG.getBytes(); 932 933 /** 934 * Checks whether the current buffer contains the signature of a 935 * "data descriptor", "local file header" or 936 * "central directory entry". 937 * 938 * <p>If it contains such a signature, reads the data descriptor 939 * and positions the stream right after the data descriptor.</p> 940 */ 941 private boolean bufferContainsSignature(final ByteArrayOutputStream bos, final int offset, final int lastRead, final int expectedDDLen) 942 throws IOException { 943 944 boolean done = false; 945 for (int i = 0; !done && i < offset + lastRead - 4; i++) { 946 if (buf.array()[i] == LFH[0] && buf.array()[i + 1] == LFH[1]) { 947 int expectDDPos = i; 948 if (i >= expectedDDLen && 949 (buf.array()[i + 2] == LFH[2] && buf.array()[i + 3] == LFH[3]) 950 || (buf.array()[i] == CFH[2] && buf.array()[i + 3] == CFH[3])) { 951 // found a LFH or CFH: 952 expectDDPos = i - expectedDDLen; 953 done = true; 954 } 955 else if (buf.array()[i + 2] == DD[2] && buf.array()[i + 3] == DD[3]) { 956 // found DD: 957 done = true; 958 } 959 if (done) { 960 // * push back bytes read in excess as well as the data 961 // descriptor 962 // * copy the remaining bytes to cache 963 // * read data descriptor 964 pushback(buf.array(), expectDDPos, offset + lastRead - expectDDPos); 965 bos.write(buf.array(), 0, expectDDPos); 966 readDataDescriptor(); 967 } 968 } 969 } 970 return done; 971 } 972 973 /** 974 * If the last read bytes could hold a data descriptor and an 975 * incomplete signature then save the last bytes to the front of 976 * the buffer and cache everything in front of the potential data 977 * descriptor into the given ByteArrayOutputStream. 978 * 979 * <p>Data descriptor plus incomplete signature (3 bytes in the 980 * worst case) can be 20 bytes max.</p> 981 */ 982 private int cacheBytesRead(final ByteArrayOutputStream bos, int offset, final int lastRead, final int expecteDDLen) { 983 final int cacheable = offset + lastRead - expecteDDLen - 3; 984 if (cacheable > 0) { 985 bos.write(buf.array(), 0, cacheable); 986 System.arraycopy(buf.array(), cacheable, buf.array(), 0, expecteDDLen + 3); 987 offset = expecteDDLen + 3; 988 } else { 989 offset += lastRead; 990 } 991 return offset; 992 } 993 994 private void pushback(final byte[] buf, final int offset, final int length) throws IOException { 995 ((PushbackInputStream) in).unread(buf, offset, length); 996 pushedBackBytes(length); 997 } 998 999 // End of Central Directory Record 1000 // end of central dir signature WORD 1001 // number of this disk SHORT 1002 // number of the disk with the 1003 // start of the central directory SHORT 1004 // total number of entries in the 1005 // central directory on this disk SHORT 1006 // total number of entries in 1007 // the central directory SHORT 1008 // size of the central directory WORD 1009 // offset of start of central 1010 // directory with respect to 1011 // the starting disk number WORD 1012 // .ZIP file comment length SHORT 1013 // .ZIP file comment up to 64KB 1014 // 1015 1016 /** 1017 * Reads the stream until it find the "End of central directory 1018 * record" and consumes it as well. 1019 */ 1020 private void skipRemainderOfArchive() throws IOException { 1021 // skip over central directory. One LFH has been read too much 1022 // already. The calculation discounts file names and extra 1023 // data so it will be too short. 1024 realSkip((long) entriesRead * CFH_LEN - LFH_LEN); 1025 findEocdRecord(); 1026 realSkip((long) ZipFile.MIN_EOCD_SIZE - WORD /* signature */ - SHORT /* comment len */); 1027 readFully(shortBuf); 1028 // file comment 1029 realSkip(ZipShort.getValue(shortBuf)); 1030 } 1031 1032 /** 1033 * Reads forward until the signature of the "End of central 1034 * directory" record is found. 1035 */ 1036 private void findEocdRecord() throws IOException { 1037 int currentByte = -1; 1038 boolean skipReadCall = false; 1039 while (skipReadCall || (currentByte = readOneByte()) > -1) { 1040 skipReadCall = false; 1041 if (!isFirstByteOfEocdSig(currentByte)) { 1042 continue; 1043 } 1044 currentByte = readOneByte(); 1045 if (currentByte != ZipArchiveOutputStream.EOCD_SIG[1]) { 1046 if (currentByte == -1) { 1047 break; 1048 } 1049 skipReadCall = isFirstByteOfEocdSig(currentByte); 1050 continue; 1051 } 1052 currentByte = readOneByte(); 1053 if (currentByte != ZipArchiveOutputStream.EOCD_SIG[2]) { 1054 if (currentByte == -1) { 1055 break; 1056 } 1057 skipReadCall = isFirstByteOfEocdSig(currentByte); 1058 continue; 1059 } 1060 currentByte = readOneByte(); 1061 if (currentByte == -1 1062 || currentByte == ZipArchiveOutputStream.EOCD_SIG[3]) { 1063 break; 1064 } 1065 skipReadCall = isFirstByteOfEocdSig(currentByte); 1066 } 1067 } 1068 1069 /** 1070 * Skips bytes by reading from the underlying stream rather than 1071 * the (potentially inflating) archive stream - which {@link 1072 * #skip} would do. 1073 * 1074 * Also updates bytes-read counter. 1075 */ 1076 private void realSkip(final long value) throws IOException { 1077 if (value >= 0) { 1078 long skipped = 0; 1079 while (skipped < value) { 1080 final long rem = value - skipped; 1081 final int x = in.read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length)); 1082 if (x == -1) { 1083 return; 1084 } 1085 count(x); 1086 skipped += x; 1087 } 1088 return; 1089 } 1090 throw new IllegalArgumentException(); 1091 } 1092 1093 /** 1094 * Reads bytes by reading from the underlying stream rather than 1095 * the (potentially inflating) archive stream - which {@link #read} would do. 1096 * 1097 * Also updates bytes-read counter. 1098 */ 1099 private int readOneByte() throws IOException { 1100 final int b = in.read(); 1101 if (b != -1) { 1102 count(1); 1103 } 1104 return b; 1105 } 1106 1107 private boolean isFirstByteOfEocdSig(final int b) { 1108 return b == ZipArchiveOutputStream.EOCD_SIG[0]; 1109 } 1110 1111 private static final byte[] APK_SIGNING_BLOCK_MAGIC = new byte[] { 1112 'A', 'P', 'K', ' ', 'S', 'i', 'g', ' ', 'B', 'l', 'o', 'c', 'k', ' ', '4', '2', 1113 }; 1114 private static final BigInteger LONG_MAX = BigInteger.valueOf(Long.MAX_VALUE); 1115 1116 /** 1117 * Checks whether this might be an APK Signing Block. 1118 * 1119 * <p>Unfortunately the APK signing block does not start with some kind of signature, it rather ends with one. It 1120 * starts with a length, so what we do is parse the suspect length, skip ahead far enough, look for the signature 1121 * and if we've found it, return true.</p> 1122 * 1123 * @param suspectLocalFileHeader the bytes read from the underlying stream in the expectation that they would hold 1124 * the local file header of the next entry. 1125 * 1126 * @return true if this looks like a APK signing block 1127 * 1128 * @see <a href="https://source.android.com/security/apksigning/v2">https://source.android.com/security/apksigning/v2</a> 1129 */ 1130 private boolean isApkSigningBlock(byte[] suspectLocalFileHeader) throws IOException { 1131 // length of block excluding the size field itself 1132 BigInteger len = ZipEightByteInteger.getValue(suspectLocalFileHeader); 1133 // LFH has already been read and all but the first eight bytes contain (part of) the APK signing block, 1134 // also subtract 16 bytes in order to position us at the magic string 1135 BigInteger toSkip = len.add(BigInteger.valueOf(DWORD - suspectLocalFileHeader.length 1136 - (long) APK_SIGNING_BLOCK_MAGIC.length)); 1137 byte[] magic = new byte[APK_SIGNING_BLOCK_MAGIC.length]; 1138 1139 try { 1140 if (toSkip.signum() < 0) { 1141 // suspectLocalFileHeader contains the start of suspect magic string 1142 int off = suspectLocalFileHeader.length + toSkip.intValue(); 1143 // length was shorter than magic length 1144 if (off < DWORD) { 1145 return false; 1146 } 1147 int bytesInBuffer = Math.abs(toSkip.intValue()); 1148 System.arraycopy(suspectLocalFileHeader, off, magic, 0, Math.min(bytesInBuffer, magic.length)); 1149 if (bytesInBuffer < magic.length) { 1150 readFully(magic, bytesInBuffer); 1151 } 1152 } else { 1153 while (toSkip.compareTo(LONG_MAX) > 0) { 1154 realSkip(Long.MAX_VALUE); 1155 toSkip = toSkip.add(LONG_MAX.negate()); 1156 } 1157 realSkip(toSkip.longValue()); 1158 readFully(magic); 1159 } 1160 } catch (EOFException ex) { //NOSONAR 1161 // length was invalid 1162 return false; 1163 } 1164 return Arrays.equals(magic, APK_SIGNING_BLOCK_MAGIC); 1165 } 1166 1167 /** 1168 * Structure collecting information for the entry that is 1169 * currently being read. 1170 */ 1171 private static final class CurrentEntry { 1172 1173 /** 1174 * Current ZIP entry. 1175 */ 1176 private final ZipArchiveEntry entry = new ZipArchiveEntry(); 1177 1178 /** 1179 * Does the entry use a data descriptor? 1180 */ 1181 private boolean hasDataDescriptor; 1182 1183 /** 1184 * Does the entry have a ZIP64 extended information extra field. 1185 */ 1186 private boolean usesZip64; 1187 1188 /** 1189 * Number of bytes of entry content read by the client if the 1190 * entry is STORED. 1191 */ 1192 private long bytesRead; 1193 1194 /** 1195 * Number of bytes of entry content read from the stream. 1196 * 1197 * <p>This may be more than the actual entry's length as some 1198 * stuff gets buffered up and needs to be pushed back when the 1199 * end of the entry has been reached.</p> 1200 */ 1201 private long bytesReadFromStream; 1202 1203 /** 1204 * The checksum calculated as the current entry is read. 1205 */ 1206 private final CRC32 crc = new CRC32(); 1207 1208 /** 1209 * The input stream decompressing the data for shrunk and imploded entries. 1210 */ 1211 private InputStream in; 1212 } 1213 1214 /** 1215 * Bounded input stream adapted from commons-io 1216 */ 1217 private class BoundedInputStream extends InputStream { 1218 1219 /** the wrapped input stream */ 1220 private final InputStream in; 1221 1222 /** the max length to provide */ 1223 private final long max; 1224 1225 /** the number of bytes already returned */ 1226 private long pos = 0; 1227 1228 /** 1229 * Creates a new <code>BoundedInputStream</code> that wraps the given input 1230 * stream and limits it to a certain size. 1231 * 1232 * @param in The wrapped input stream 1233 * @param size The maximum number of bytes to return 1234 */ 1235 public BoundedInputStream(final InputStream in, final long size) { 1236 this.max = size; 1237 this.in = in; 1238 } 1239 1240 @Override 1241 public int read() throws IOException { 1242 if (max >= 0 && pos >= max) { 1243 return -1; 1244 } 1245 final int result = in.read(); 1246 pos++; 1247 count(1); 1248 current.bytesReadFromStream++; 1249 return result; 1250 } 1251 1252 @Override 1253 public int read(final byte[] b) throws IOException { 1254 return this.read(b, 0, b.length); 1255 } 1256 1257 @Override 1258 public int read(final byte[] b, final int off, final int len) throws IOException { 1259 if (max >= 0 && pos >= max) { 1260 return -1; 1261 } 1262 final long maxRead = max >= 0 ? Math.min(len, max - pos) : len; 1263 final int bytesRead = in.read(b, off, (int) maxRead); 1264 1265 if (bytesRead == -1) { 1266 return -1; 1267 } 1268 1269 pos += bytesRead; 1270 count(bytesRead); 1271 current.bytesReadFromStream += bytesRead; 1272 return bytesRead; 1273 } 1274 1275 @Override 1276 public long skip(final long n) throws IOException { 1277 final long toSkip = max >= 0 ? Math.min(n, max - pos) : n; 1278 final long skippedBytes = IOUtils.skip(in, toSkip); 1279 pos += skippedBytes; 1280 return skippedBytes; 1281 } 1282 1283 @Override 1284 public int available() throws IOException { 1285 if (max >= 0 && pos >= max) { 1286 return 0; 1287 } 1288 return in.available(); 1289 } 1290 } 1291}