| 1 | /******************************************************************************* |
| 2 | * Copyright (c) 2008, 2009 IBM Corporation and others. |
| 3 | * All rights reserved. This program and the accompanying materials |
| 4 | * are made available under the terms of the Eclipse Public License v1.0 |
| 5 | * which accompanies this distribution, and is available at |
| 6 | * http://www.eclipse.org/legal/epl-v10.html |
| 7 | * |
| 8 | * Contributors: |
| 9 | * IBM Corporation - initial API and implementation |
| 10 | * Remy Chi Jian Suen <remy.suen@gmail.com> - Bug 243347 TarFile should not throw NPE in finalize() |
| 11 | *******************************************************************************/ |
| 12 | package org.eclipse.pde.api.tools.internal.util; |
| 13 | |
| 14 | import java.io.File; |
| 15 | import java.io.FileInputStream; |
| 16 | import java.io.FilterInputStream; |
| 17 | import java.io.IOException; |
| 18 | import java.io.InputStream; |
| 19 | import java.util.Enumeration; |
| 20 | import java.util.zip.GZIPInputStream; |
| 21 | /** |
| 22 | * Reads a .tar or .tar.gz archive file, providing an index enumeration |
| 23 | * and allows for accessing an InputStream for arbitrary files in the |
| 24 | * archive. |
| 25 | */ |
| 26 | public class TarFile { |
| 27 | private static class TarInputStream extends FilterInputStream { |
| 28 | private int nextEntry = 0; |
| 29 | private int nextEOF = 0; |
| 30 | private int filepos = 0; |
| 31 | private int bytesread = 0; |
| 32 | private TarEntry firstEntry = null; |
| 33 | private String longLinkName = null; |
| 34 | |
| 35 | /** |
| 36 | * Creates a new tar input stream on the given input stream. |
| 37 | * |
| 38 | * @param in input stream |
| 39 | * @throws TarException |
| 40 | * @throws IOException |
| 41 | */ |
| 42 | public TarInputStream(InputStream in) throws TarException, IOException { |
| 43 | super(in); |
| 44 | |
| 45 | // Read in the first TarEntry to make sure |
| 46 | // the input is a valid tar file stream. |
| 47 | firstEntry = getNextEntry(); |
| 48 | } |
| 49 | |
| 50 | /** |
| 51 | * Create a new tar input stream, skipping ahead to the given entry |
| 52 | * in the file. |
| 53 | * |
| 54 | * @param in input stream |
| 55 | * @param entry skips to this entry in the file |
| 56 | * @throws TarException |
| 57 | * @throws IOException |
| 58 | */ |
| 59 | TarInputStream(InputStream in, TarEntry entry) throws TarException, IOException { |
| 60 | super(in); |
| 61 | skipToEntry(entry); |
| 62 | } |
| 63 | |
| 64 | /** |
| 65 | * The checksum of a tar file header is simply the sum of the bytes in |
| 66 | * the header. |
| 67 | * |
| 68 | * @param header |
| 69 | * @return checksum |
| 70 | */ |
| 71 | private long headerChecksum(byte[] header) { |
| 72 | long sum = 0; |
| 73 | for(int i = 0; i < 512; i++) { |
| 74 | sum += header[i] & 0xff; |
| 75 | } |
| 76 | return sum; |
| 77 | } |
| 78 | |
| 79 | /** |
| 80 | * Skips ahead to the position of the given entry in the file. |
| 81 | * |
| 82 | * @param entry |
| 83 | * @returns false if the entry has already been passed |
| 84 | * @throws TarException |
| 85 | * @throws IOException |
| 86 | */ |
| 87 | boolean skipToEntry(TarEntry entry) throws TarException, IOException { |
| 88 | int bytestoskip = entry.filepos - bytesread; |
| 89 | if(bytestoskip < 0) { |
| 90 | return false; |
| 91 | } |
| 92 | while(bytestoskip > 0) { |
| 93 | long ret = in.skip(bytestoskip); |
| 94 | if(ret < 0) { |
| 95 | throw new IOException("early end of stream"); //$NON-NLS-1$ |
| 96 | } |
| 97 | bytestoskip -= ret; |
| 98 | bytesread += ret; |
| 99 | } |
| 100 | filepos = entry.filepos; |
| 101 | nextEntry = 0; |
| 102 | nextEOF = 0; |
| 103 | // Read next header to seek to file data. |
| 104 | getNextEntry(); |
| 105 | return true; |
| 106 | } |
| 107 | |
| 108 | /** |
| 109 | * Returns true if the header checksum is correct. |
| 110 | * |
| 111 | * @param header |
| 112 | * @return true if this header has a valid checksum |
| 113 | */ |
| 114 | private boolean isValidTarHeader(byte[] header) { |
| 115 | long fileChecksum, calculatedChecksum; |
| 116 | int pos, i; |
| 117 | |
| 118 | pos = 148; |
| 119 | StringBuffer checksumString = new StringBuffer(); |
| 120 | for(i = 0; i < 8; i++) { |
| 121 | if(header[pos + i] == ' ') { |
| 122 | continue; |
| 123 | } |
| 124 | if(header[pos + i] == 0 || !Character.isDigit((char) header[pos + i])) { |
| 125 | break; |
| 126 | } |
| 127 | checksumString.append((char) header[pos + i]); |
| 128 | } |
| 129 | if(checksumString.length() == 0) { |
| 130 | return false; |
| 131 | } |
| 132 | if(checksumString.charAt(0) != '0') { |
| 133 | checksumString.insert(0, '0'); |
| 134 | } |
| 135 | try { |
| 136 | fileChecksum = Long.decode(checksumString.toString()).longValue(); |
| 137 | } catch(NumberFormatException exception) { |
| 138 | //This is not valid if it cannot be parsed |
| 139 | return false; |
| 140 | } |
| 141 | |
| 142 | // Blank out the checksum. |
| 143 | for(i = 0; i < 8; i++) { |
| 144 | header[pos + i] = ' '; |
| 145 | } |
| 146 | calculatedChecksum = headerChecksum(header); |
| 147 | |
| 148 | return (fileChecksum == calculatedChecksum); |
| 149 | } |
| 150 | |
| 151 | /** |
| 152 | * Returns the next entry in the tar file. Does not handle |
| 153 | * GNU @LongLink extensions. |
| 154 | * |
| 155 | * @return the next entry in the tar file |
| 156 | * @throws TarException |
| 157 | * @throws IOException |
| 158 | */ |
| 159 | TarEntry getNextEntryInternal() throws TarException, IOException { |
| 160 | byte[] header = new byte[512]; |
| 161 | int pos = 0; |
| 162 | int i; |
| 163 | |
| 164 | if(firstEntry != null) { |
| 165 | TarEntry entryReturn = firstEntry; |
| 166 | firstEntry = null; |
| 167 | return entryReturn; |
| 168 | } |
| 169 | |
| 170 | while(nextEntry > 0) { |
| 171 | long ret = in.skip(nextEntry); |
| 172 | if(ret < 0) { |
| 173 | throw new IOException("early end of stream"); //$NON-NLS-1$ |
| 174 | } |
| 175 | nextEntry -= ret; |
| 176 | bytesread += ret; |
| 177 | } |
| 178 | |
| 179 | int bytestoread = 512; |
| 180 | while(bytestoread > 0) { |
| 181 | int ret = super.read(header, 512 - bytestoread, bytestoread); |
| 182 | if( ret < 0 ) { |
| 183 | throw new IOException("early end of stream"); //$NON-NLS-1$ |
| 184 | } |
| 185 | bytestoread -= ret; |
| 186 | bytesread += ret; |
| 187 | } |
| 188 | |
| 189 | // If we have a header of all zeros, this marks the end of the file. |
| 190 | if(headerChecksum(header) == 0) { |
| 191 | // We are at the end of the file. |
| 192 | if(filepos > 0) { |
| 193 | return null; |
| 194 | } |
| 195 | |
| 196 | // Invalid stream. |
| 197 | throw new TarException("not in tar format"); //$NON-NLS-1$ |
| 198 | } |
| 199 | |
| 200 | // Validate checksum. |
| 201 | if(!isValidTarHeader(header)) { |
| 202 | throw new TarException("not in tar format"); //$NON-NLS-1$ |
| 203 | } |
| 204 | |
| 205 | while (pos < 100 && header[pos] != 0) { |
| 206 | pos++; |
| 207 | } |
| 208 | String name = new String(header, 0, pos, "UTF8"); //$NON-NLS-1$ |
| 209 | // Prepend the prefix here. |
| 210 | pos = 345; |
| 211 | if(header[pos] != 0) { |
| 212 | while (pos < 500 && header[pos] != 0) { |
| 213 | pos++; |
| 214 | } |
| 215 | String prefix = new String(header, 345, pos - 345, "UTF8"); //$NON-NLS-1$ |
| 216 | name = prefix + "/" + name; //$NON-NLS-1$ |
| 217 | } |
| 218 | |
| 219 | TarEntry entry; |
| 220 | if(longLinkName != null) { |
| 221 | entry = new TarEntry(longLinkName, filepos); |
| 222 | longLinkName = null; |
| 223 | } else { |
| 224 | entry = new TarEntry(name, filepos); |
| 225 | } |
| 226 | if(header[156] != 0) { |
| 227 | entry.setFileType(header[156]); |
| 228 | } |
| 229 | |
| 230 | pos = 100; |
| 231 | StringBuffer mode = new StringBuffer(); |
| 232 | for(i = 0; i < 8; i++) { |
| 233 | if(header[pos + i] == 0) { |
| 234 | break; |
| 235 | } |
| 236 | if(header[pos + i] == ' ') { |
| 237 | continue; |
| 238 | } |
| 239 | mode.append((char) header[pos + i]); |
| 240 | } |
| 241 | if(mode.length() > 0 && mode.charAt(0) != '0') { |
| 242 | mode.insert(0, '0'); |
| 243 | } |
| 244 | try { |
| 245 | long fileMode = Long.decode(mode.toString()).longValue(); |
| 246 | entry.setMode(fileMode); |
| 247 | } catch(NumberFormatException nfe) { |
| 248 | throw new TarException("Not a valid tar format", nfe); //$NON-NLS-1$ |
| 249 | } |
| 250 | |
| 251 | pos = 100 + 24; |
| 252 | StringBuffer size = new StringBuffer(); |
| 253 | for(i = 0; i < 12; i++) { |
| 254 | if(header[pos + i] == 0) { |
| 255 | break; |
| 256 | } |
| 257 | if(header[pos + i] == ' ') { |
| 258 | continue; |
| 259 | } |
| 260 | size.append((char) header[pos + i]); |
| 261 | } |
| 262 | if(size.charAt(0) != '0') { |
| 263 | size.insert(0, '0'); |
| 264 | } |
| 265 | int fileSize; |
| 266 | try { |
| 267 | fileSize = Integer.decode(size.toString()).intValue(); |
| 268 | } catch(NumberFormatException nfe) { |
| 269 | throw new TarException("Not a valid tar format", nfe); //$NON-NLS-1$ |
| 270 | } |
| 271 | |
| 272 | entry.setSize(fileSize); |
| 273 | nextEOF = fileSize; |
| 274 | if(fileSize % 512 > 0) { |
| 275 | nextEntry = fileSize + (512 - (fileSize % 512)); |
| 276 | } else { |
| 277 | nextEntry = fileSize; |
| 278 | } |
| 279 | filepos += (nextEntry + 512); |
| 280 | return entry; |
| 281 | } |
| 282 | |
| 283 | /** |
| 284 | * Moves ahead to the next file in the tar archive and returns |
| 285 | * a TarEntry object describing it. |
| 286 | * |
| 287 | * @return the next entry in the tar file |
| 288 | * @throws TarException |
| 289 | * @throws IOException |
| 290 | */ |
| 291 | public TarEntry getNextEntry() throws TarException, IOException { |
| 292 | TarEntry entry = getNextEntryInternal(); |
| 293 | |
| 294 | if(entry != null && entry.getName().equals("././@LongLink")) { //$NON-NLS-1$ |
| 295 | // This is a GNU extension for doing long filenames. |
| 296 | // We get a file called ././@LongLink which just contains |
| 297 | // the real pathname. |
| 298 | byte[] longNameData = new byte[(int) entry.getSize()]; |
| 299 | int bytesread = 0; |
| 300 | while (bytesread < longNameData.length) { |
| 301 | int cur = read(longNameData, bytesread, longNameData.length - bytesread); |
| 302 | if (cur < 0) { |
| 303 | throw new IOException("early end of stream"); //$NON-NLS-1$ |
| 304 | } |
| 305 | bytesread += cur; |
| 306 | } |
| 307 | |
| 308 | int pos = 0; |
| 309 | while (pos < longNameData.length && longNameData[pos] != 0) { |
| 310 | pos++; |
| 311 | } |
| 312 | longLinkName = new String(longNameData, 0, pos, "UTF8"); //$NON-NLS-1$ |
| 313 | return getNextEntryInternal(); |
| 314 | } |
| 315 | return entry; |
| 316 | } |
| 317 | |
| 318 | /* (non-Javadoc) |
| 319 | * @see java.io.FilterInputStream#read(byte[], int, int) |
| 320 | */ |
| 321 | public int read(byte[] b, int off, int len) throws IOException { |
| 322 | if(nextEOF == 0) { |
| 323 | return -1; |
| 324 | } |
| 325 | int size = super.read(b, off, (len > nextEOF ? nextEOF : len)); |
| 326 | nextEntry -= size; |
| 327 | nextEOF -= size; |
| 328 | bytesread += size; |
| 329 | return size; |
| 330 | } |
| 331 | |
| 332 | /* (non-Javadoc) |
| 333 | * @see java.io.FilterInputStream#read() |
| 334 | */ |
| 335 | public int read() throws IOException { |
| 336 | byte[] data = new byte[1]; |
| 337 | int size = read(data, 0, 1); |
| 338 | if (size < 0) { |
| 339 | return size; |
| 340 | } |
| 341 | return data[0]; |
| 342 | } |
| 343 | } |
| 344 | private File file; |
| 345 | TarInputStream entryEnumerationStream; |
| 346 | TarEntry curEntry; |
| 347 | private TarInputStream entryStream; |
| 348 | |
| 349 | private InputStream internalEntryStream; |
| 350 | |
| 351 | /** |
| 352 | * Create a new TarFile for the given file. |
| 353 | * |
| 354 | * @param file |
| 355 | * @throws TarException |
| 356 | * @throws IOException |
| 357 | */ |
| 358 | public TarFile(File file) throws TarException, IOException { |
| 359 | this.file = file; |
| 360 | |
| 361 | InputStream in = new FileInputStream(file); |
| 362 | // First, check if it's a GZIPInputStream. |
| 363 | try { |
| 364 | in = new GZIPInputStream(in); |
| 365 | } catch(IOException e) { |
| 366 | //If it is not compressed we close |
| 367 | //the old one and recreate |
| 368 | in.close(); |
| 369 | in = new FileInputStream(file); |
| 370 | } |
| 371 | try { |
| 372 | entryEnumerationStream = new TarInputStream(in); |
| 373 | } catch (TarException ex) { |
| 374 | in.close(); |
| 375 | throw ex; |
| 376 | } |
| 377 | curEntry = entryEnumerationStream.getNextEntry(); |
| 378 | } |
| 379 | |
| 380 | /** |
| 381 | * Close the tar file input stream. |
| 382 | * |
| 383 | * @throws IOException if the file cannot be successfully closed |
| 384 | */ |
| 385 | public void close() throws IOException { |
| 386 | if (entryEnumerationStream != null) |
| 387 | entryEnumerationStream.close(); |
| 388 | if (internalEntryStream != null) |
| 389 | internalEntryStream.close(); |
| 390 | } |
| 391 | |
| 392 | /** |
| 393 | * Create a new TarFile for the given path name. |
| 394 | * |
| 395 | * @param filename |
| 396 | * @throws TarException |
| 397 | * @throws IOException |
| 398 | */ |
| 399 | public TarFile(String filename) throws TarException, IOException { |
| 400 | this(new File(filename)); |
| 401 | } |
| 402 | |
| 403 | /** |
| 404 | * Returns an enumeration cataloguing the tar archive. |
| 405 | * |
| 406 | * @return enumeration of all files in the archive |
| 407 | */ |
| 408 | public Enumeration entries() { |
| 409 | return new Enumeration() { |
| 410 | public boolean hasMoreElements() { |
| 411 | return (curEntry != null); |
| 412 | } |
| 413 | |
| 414 | public Object nextElement() { |
| 415 | TarEntry oldEntry = curEntry; |
| 416 | try { |
| 417 | curEntry = entryEnumerationStream.getNextEntry(); |
| 418 | } catch(TarException e) { |
| 419 | curEntry = null; |
| 420 | } catch(IOException e) { |
| 421 | curEntry = null; |
| 422 | } |
| 423 | return oldEntry; |
| 424 | } |
| 425 | }; |
| 426 | } |
| 427 | |
| 428 | /** |
| 429 | * Returns a new InputStream for the given file in the tar archive. |
| 430 | * |
| 431 | * @param entry |
| 432 | * @return an input stream for the given file |
| 433 | * @throws TarException |
| 434 | * @throws IOException |
| 435 | */ |
| 436 | public InputStream getInputStream(TarEntry entry) throws TarException, IOException { |
| 437 | if(entryStream == null || !entryStream.skipToEntry(entry)) { |
| 438 | if (internalEntryStream != null) { |
| 439 | internalEntryStream.close(); |
| 440 | } |
| 441 | internalEntryStream = new FileInputStream(file); |
| 442 | // First, check if it's a GZIPInputStream. |
| 443 | try { |
| 444 | internalEntryStream = new GZIPInputStream(internalEntryStream); |
| 445 | } catch(IOException e) { |
| 446 | //If it is not compressed we close |
| 447 | //the old one and recreate |
| 448 | internalEntryStream.close(); |
| 449 | internalEntryStream = new FileInputStream(file); |
| 450 | } |
| 451 | entryStream = new TarInputStream(internalEntryStream, entry) { |
| 452 | public void close() { |
| 453 | // Ignore close() since we want to reuse the stream. |
| 454 | } |
| 455 | }; |
| 456 | } |
| 457 | return entryStream; |
| 458 | } |
| 459 | |
| 460 | /** |
| 461 | * Returns the path name of the file this archive represents. |
| 462 | * |
| 463 | * @return path |
| 464 | */ |
| 465 | public String getName() { |
| 466 | return file.getPath(); |
| 467 | } |
| 468 | |
| 469 | /* (non-Javadoc) |
| 470 | * @see java.util.zip.ZipFile#finalize() |
| 471 | * |
| 472 | */ |
| 473 | protected void finalize() throws Throwable { |
| 474 | close(); |
| 475 | } |
| 476 | } |