source: trunk/third/bzip2/manual_2.html @ 17062

Revision 17062, 28.9 KB checked in by ghudson, 23 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r17061, which included commits to RCS files with non-trunk default branches.
Line 
1<HTML>
2<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
3<!-- Created on January, 5  2002 by texi2html 1.64 -->
4<!--
5Written by: Lionel Cons <Lionel.Cons@cern.ch> (original author)
6            Karl Berry  <karl@freefriends.org>
7            Olaf Bachmann <obachman@mathematik.uni-kl.de>
8            and many others.
9Maintained by: Olaf Bachmann <obachman@mathematik.uni-kl.de>
10Send bugs and suggestions to <texi2html@mathematik.uni-kl.de>
11 
12-->
13<HEAD>
14<TITLE>Untitled Document: 2. How to use <CODE>bzip2</CODE></TITLE>
15
16<META NAME="description" CONTENT="Untitled Document: 2. How to use <CODE>bzip2</CODE>">
17<META NAME="keywords" CONTENT="Untitled Document: 2. How to use <CODE>bzip2</CODE>">
18<META NAME="resource-type" CONTENT="document">
19<META NAME="distribution" CONTENT="global">
20<META NAME="Generator" CONTENT="texi2html 1.64">
21
22</HEAD>
23
24<BODY LANG="" BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#800080" ALINK="#FF0000">
25
26<A NAME="SEC2"></A>
27<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
28<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_1.html#SEC1"> &lt; </A>]</TD>
29<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC3"> &gt; </A>]</TD>
30<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[ &lt;&lt; ]</TD>
31<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
32<TD VALIGN="MIDDLE" ALIGN="LEFT">[ &gt;&gt; ]</TD>
33<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
34<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
35<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
36<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
37</TR></TABLE>
38<H1> 2. How to use <CODE>bzip2</CODE> </H1>
39<!--docid::SEC2::-->
40<P>
41
42This chapter contains a copy of the <CODE>bzip2</CODE> man page,
43and nothing else.
44</P><P>
45
46<BLOCKQUOTE>
47<P>
48
49<HR SIZE="6">
50<A NAME="SEC3"></A>
51<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
52<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC2"> &lt; </A>]</TD>
53<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC4"> &gt; </A>]</TD>
54<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[ &lt;&lt; ]</TD>
55<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
56<TD VALIGN="MIDDLE" ALIGN="LEFT">[ &gt;&gt; ]</TD>
57<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
58<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
59<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
60<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
61</TR></TABLE>
62<H4> NAME </H4>
63<!--docid::SEC3::-->
64<UL>
65<LI><CODE>bzip2</CODE>, <CODE>bunzip2</CODE>
66- a block-sorting file compressor, v1.0.2
67<LI><CODE>bzcat</CODE>
68- decompresses files to stdout
69<LI><CODE>bzip2recover</CODE>
70- recovers data from damaged bzip2 files
71</UL>
72<P>
73
74<HR SIZE="6">
75<A NAME="SEC4"></A>
76<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
77<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC3"> &lt; </A>]</TD>
78<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC5"> &gt; </A>]</TD>
79<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[ &lt;&lt; ]</TD>
80<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
81<TD VALIGN="MIDDLE" ALIGN="LEFT">[ &gt;&gt; ]</TD>
82<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
83<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
84<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
85<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
86</TR></TABLE>
87<H4> SYNOPSIS </H4>
88<!--docid::SEC4::-->
89<UL>
90<LI><CODE>bzip2</CODE> [ -cdfkqstvzVL123456789 ] [ filenames ...  ]
91<LI><CODE>bunzip2</CODE> [ -fkvsVL ] [ filenames ...  ]
92<LI><CODE>bzcat</CODE> [ -s ] [ filenames ...  ]
93<LI><CODE>bzip2recover</CODE> filename
94</UL>
95<P>
96
97<HR SIZE="6">
98<A NAME="SEC5"></A>
99<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
100<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC4"> &lt; </A>]</TD>
101<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC6"> &gt; </A>]</TD>
102<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[ &lt;&lt; ]</TD>
103<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
104<TD VALIGN="MIDDLE" ALIGN="LEFT">[ &gt;&gt; ]</TD>
105<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
106<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
107<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
108<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
109</TR></TABLE>
110<H4> DESCRIPTION </H4>
111<!--docid::SEC5::-->
112<P>
113
114<CODE>bzip2</CODE> compresses files using the Burrows-Wheeler block sorting
115text compression algorithm, and Huffman coding.  Compression is
116generally considerably better than that achieved by more conventional
117LZ77/LZ78-based compressors, and approaches the performance of the PPM
118family of statistical compressors.
119</P><P>
120
121The command-line options are deliberately very similar to those of GNU
122<CODE>gzip</CODE>, but they are not identical.
123</P><P>
124
125<CODE>bzip2</CODE> expects a list of file names to accompany the command-line
126flags.  Each file is replaced by a compressed version of itself, with
127the name <CODE>original_name.bz2</CODE>.  Each compressed file has the same
128modification date, permissions, and, when possible, ownership as the
129corresponding original, so that these properties can be correctly
130restored at decompression time.  File name handling is naive in the
131sense that there is no mechanism for preserving original file names,
132permissions, ownerships or dates in filesystems which lack these
133concepts, or have serious file name length restrictions, such as MS-DOS.
134</P><P>
135
136<CODE>bzip2</CODE> and <CODE>bunzip2</CODE> will by default not overwrite existing
137files.  If you want this to happen, specify the <CODE>-f</CODE> flag.
138</P><P>
139
140If no file names are specified, <CODE>bzip2</CODE> compresses from standard
141input to standard output.  In this case, <CODE>bzip2</CODE> will decline to
142write compressed output to a terminal, as this would be entirely
143incomprehensible and therefore pointless.
144</P><P>
145
146<CODE>bunzip2</CODE> (or <CODE>bzip2 -d</CODE>) decompresses all
147specified files.  Files which were not created by <CODE>bzip2</CODE>
148will be detected and ignored, and a warning issued. 
149<CODE>bzip2</CODE> attempts to guess the filename for the decompressed file
150from that of the compressed file as follows:
151<UL>
152<LI><CODE>filename.bz2 </CODE> becomes <CODE>filename</CODE>
153<LI><CODE>filename.bz </CODE> becomes <CODE>filename</CODE>
154<LI><CODE>filename.tbz2</CODE> becomes <CODE>filename.tar</CODE>
155<LI><CODE>filename.tbz </CODE> becomes <CODE>filename.tar</CODE>
156<LI><CODE>anyothername </CODE> becomes <CODE>anyothername.out</CODE>
157</UL>
158If the file does not end in one of the recognised endings,
159<CODE>.bz2</CODE>, <CODE>.bz</CODE>,
160<CODE>.tbz2</CODE> or <CODE>.tbz</CODE>, <CODE>bzip2</CODE> complains that it cannot
161guess the name of the original file, and uses the original name
162with <CODE>.out</CODE> appended.
163<P>
164
165As with compression, supplying no
166filenames causes decompression from standard input to standard output.
167</P><P>
168
169<CODE>bunzip2</CODE> will correctly decompress a file which is the
170concatenation of two or more compressed files.  The result is the
171concatenation of the corresponding uncompressed files.  Integrity
172testing (<CODE>-t</CODE>) of concatenated compressed files is also supported.
173</P><P>
174
175You can also compress or decompress files to the standard output by
176giving the <CODE>-c</CODE> flag.  Multiple files may be compressed and
177decompressed like this.  The resulting outputs are fed sequentially to
178stdout.  Compression of multiple files in this manner generates a stream
179containing multiple compressed file representations.  Such a stream
180can be decompressed correctly only by <CODE>bzip2</CODE> version 0.9.0 or
181later.  Earlier versions of <CODE>bzip2</CODE> will stop after decompressing
182the first file in the stream.
183</P><P>
184
185<CODE>bzcat</CODE> (or <CODE>bzip2 -dc</CODE>) decompresses all specified files to
186the standard output.
187</P><P>
188
189<CODE>bzip2</CODE> will read arguments from the environment variables
190<CODE>BZIP2</CODE> and <CODE>BZIP</CODE>, in that order, and will process them
191before any arguments read from the command line.  This gives a
192convenient way to supply default arguments.
193</P><P>
194
195Compression is always performed, even if the compressed file is slightly
196larger than the original.  Files of less than about one hundred bytes
197tend to get larger, since the compression mechanism has a constant
198overhead in the region of 50 bytes.  Random data (including the output
199of most file compressors) is coded at about 8.05 bits per byte, giving
200an expansion of around 0.5%.
201</P><P>
202
203As a self-check for your protection, <CODE>bzip2</CODE> uses 32-bit CRCs to
204make sure that the decompressed version of a file is identical to the
205original.  This guards against corruption of the compressed data, and
206against undetected bugs in <CODE>bzip2</CODE> (hopefully very unlikely).  The
207chances of data corruption going undetected is microscopic, about one
208chance in four billion for each file processed.  Be aware, though, that
209the check occurs upon decompression, so it can only tell you that
210something is wrong.  It can't help you recover the original uncompressed
211data.  You can use <CODE>bzip2recover</CODE> to try to recover data from
212damaged files.
213</P><P>
214
215Return values: 0 for a normal exit, 1 for environmental problems (file
216not found, invalid flags, I/O errors, &#38;c), 2 to indicate a corrupt
217compressed file, 3 for an internal consistency error (eg, bug) which
218caused <CODE>bzip2</CODE> to panic.
219</P><P>
220
221<HR SIZE="6">
222<A NAME="SEC6"></A>
223<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
224<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC5"> &lt; </A>]</TD>
225<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC7"> &gt; </A>]</TD>
226<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[ &lt;&lt; ]</TD>
227<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
228<TD VALIGN="MIDDLE" ALIGN="LEFT">[ &gt;&gt; ]</TD>
229<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
230<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
231<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
232<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
233</TR></TABLE>
234<H4> OPTIONS </H4>
235<!--docid::SEC6::-->
236<DL COMPACT>
237<DT><CODE>-c  --stdout</CODE>
238<DD>Compress or decompress to standard output.
239<DT><CODE>-d  --decompress</CODE>
240<DD>Force decompression. <CODE>bzip2</CODE>, <CODE>bunzip2</CODE> and <CODE>bzcat</CODE> are
241really the same program, and the decision about what actions to take is
242done on the basis of which name is used.  This flag overrides that
243mechanism, and forces bzip2 to decompress.
244<DT><CODE>-z --compress</CODE>
245<DD>The complement to <CODE>-d</CODE>: forces compression, regardless of the
246invokation name.
247<DT><CODE>-t --test</CODE>
248<DD>Check integrity of the specified file(s), but don't decompress them.
249This really performs a trial decompression and throws away the result.
250<DT><CODE>-f --force</CODE>
251<DD>Force overwrite of output files. Normally, <CODE>bzip2</CODE> will not overwrite
252existing output files.  Also forces <CODE>bzip2</CODE> to break hard links
253to files, which it otherwise wouldn't do.
254<P>
255
256<CODE>bzip2</CODE> normally declines to decompress files which don't have the
257correct magic header bytes.  If forced (<CODE>-f</CODE>), however, it will
258pass such files through unmodified.  This is how GNU <CODE>gzip</CODE>
259behaves.
260<DT><CODE>-k --keep</CODE>
261<DD>Keep (don't delete) input files during compression
262or decompression.
263<DT><CODE>-s --small</CODE>
264<DD>Reduce memory usage, for compression, decompression and testing.  Files
265are decompressed and tested using a modified algorithm which only
266requires 2.5 bytes per block byte.  This means any file can be
267decompressed in 2300k of memory, albeit at about half the normal speed.
268<P>
269
270During compression, <CODE>-s</CODE> selects a block size of 200k, which limits
271memory use to around the same figure, at the expense of your compression
272ratio.  In short, if your machine is low on memory (8 megabytes or
273less), use -s for everything.  See MEMORY MANAGEMENT below.
274<DT><CODE>-q --quiet</CODE>
275<DD>Suppress non-essential warning messages.  Messages pertaining to
276I/O errors and other critical events will not be suppressed.
277<DT><CODE>-v --verbose</CODE>
278<DD>Verbose mode -- show the compression ratio for each file processed.
279Further <CODE>-v</CODE>'s increase the verbosity level, spewing out lots of
280information which is primarily of interest for diagnostic purposes.
281<DT><CODE>-L --license -V --version</CODE>
282<DD>Display the software version, license terms and conditions.
283<DT><CODE>-1 (or --fast) to -9 (or --best)</CODE>
284<DD>Set the block size to 100 k, 200 k ..  900 k when compressing.  Has no
285effect when decompressing.  See MEMORY MANAGEMENT below.
286The <CODE>--fast</CODE> and <CODE>--best</CODE> aliases are primarily for GNU
287<CODE>gzip</CODE> compatibility.  In particular, <CODE>--fast</CODE> doesn't make
288things significantly faster.  And <CODE>--best</CODE> merely selects the
289default behaviour.
290<DT><CODE>--</CODE>
291<DD>Treats all subsequent arguments as file names, even if they start
292with a dash.  This is so you can handle files with names beginning
293with a dash, for example: <CODE>bzip2 -- -myfilename</CODE>.
294<DT><CODE>--repetitive-fast</CODE>
295<DD><DT><CODE>--repetitive-best</CODE>
296<DD>These flags are redundant in versions 0.9.5 and above.  They provided
297some coarse control over the behaviour of the sorting algorithm in
298earlier versions, which was sometimes useful.  0.9.5 and above have an
299improved algorithm which renders these flags irrelevant.
300</DL>
301<P>
302
303<HR SIZE="6">
304<A NAME="SEC7"></A>
305<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
306<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC6"> &lt; </A>]</TD>
307<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC8"> &gt; </A>]</TD>
308<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[ &lt;&lt; ]</TD>
309<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
310<TD VALIGN="MIDDLE" ALIGN="LEFT">[ &gt;&gt; ]</TD>
311<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
312<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
313<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
314<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
315</TR></TABLE>
316<H4> MEMORY MANAGEMENT </H4>
317<!--docid::SEC7::-->
318<P>
319
320<CODE>bzip2</CODE> compresses large files in blocks.  The block size affects
321both the compression ratio achieved, and the amount of memory needed for
322compression and decompression.  The flags <CODE>-1</CODE> through <CODE>-9</CODE>
323specify the block size to be 100,000 bytes through 900,000 bytes (the
324default) respectively.  At decompression time, the block size used for
325compression is read from the header of the compressed file, and
326<CODE>bunzip2</CODE> then allocates itself just enough memory to decompress
327the file.  Since block sizes are stored in compressed files, it follows
328that the flags <CODE>-1</CODE> to <CODE>-9</CODE> are irrelevant to and so ignored
329during decompression.
330</P><P>
331
332Compression and decompression requirements, in bytes, can be estimated
333as:
334<TABLE><tr><td>&nbsp;</td><td class=example><pre>     Compression:   400k + ( 8 x block size )
335
336     Decompression: 100k + ( 4 x block size ), or
337                    100k + ( 2.5 x block size )
338</pre></td></tr></table>Larger block sizes give rapidly diminishing marginal returns.  Most of
339the compression comes from the first two or three hundred k of block
340size, a fact worth bearing in mind when using <CODE>bzip2</CODE> on small machines.
341It is also important to appreciate that the decompression memory
342requirement is set at compression time by the choice of block size.
343</P><P>
344
345For files compressed with the default 900k block size, <CODE>bunzip2</CODE>
346will require about 3700 kbytes to decompress.  To support decompression
347of any file on a 4 megabyte machine, <CODE>bunzip2</CODE> has an option to
348decompress using approximately half this amount of memory, about 2300
349kbytes.  Decompression speed is also halved, so you should use this
350option only where necessary.  The relevant flag is <CODE>-s</CODE>.
351</P><P>
352
353In general, try and use the largest block size memory constraints allow,
354since that maximises the compression achieved.  Compression and
355decompression speed are virtually unaffected by block size.
356</P><P>
357
358Another significant point applies to files which fit in a single block
359-- that means most files you'd encounter using a large block size.  The
360amount of real memory touched is proportional to the size of the file,
361since the file is smaller than a block.  For example, compressing a file
36220,000 bytes long with the flag <CODE>-9</CODE> will cause the compressor to
363allocate around 7600k of memory, but only touch 400k + 20000 * 8 = 560
364kbytes of it.  Similarly, the decompressor will allocate 3700k but only
365touch 100k + 20000 * 4 = 180 kbytes.
366</P><P>
367
368Here is a table which summarises the maximum memory usage for different
369block sizes.  Also recorded is the total compressed size for 14 files of
370the Calgary Text Compression Corpus totalling 3,141,622 bytes.  This
371column gives some feel for how compression varies with block size.
372These figures tend to understate the advantage of larger block sizes for
373larger files, since the Corpus is dominated by smaller files.
374<TABLE><tr><td>&nbsp;</td><td class=example><pre>          Compress   Decompress   Decompress   Corpus
375   Flag     usage      usage       -s usage     Size
376
377    -1      1200k       500k         350k      914704
378    -2      2000k       900k         600k      877703
379    -3      2800k      1300k         850k      860338
380    -4      3600k      1700k        1100k      846899
381    -5      4400k      2100k        1350k      845160
382    -6      5200k      2500k        1600k      838626
383    -7      6100k      2900k        1850k      834096
384    -8      6800k      3300k        2100k      828642
385    -9      7600k      3700k        2350k      828642
386</pre></td></tr></table></P><P>
387
388<HR SIZE="6">
389<A NAME="SEC8"></A>
390<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
391<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC7"> &lt; </A>]</TD>
392<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC9"> &gt; </A>]</TD>
393<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[ &lt;&lt; ]</TD>
394<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
395<TD VALIGN="MIDDLE" ALIGN="LEFT">[ &gt;&gt; ]</TD>
396<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
397<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
398<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
399<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
400</TR></TABLE>
401<H4> RECOVERING DATA FROM DAMAGED FILES </H4>
402<!--docid::SEC8::-->
403<P>
404
405<CODE>bzip2</CODE> compresses files in blocks, usually 900kbytes long.  Each
406block is handled independently.  If a media or transmission error causes
407a multi-block <CODE>.bz2</CODE> file to become damaged, it may be possible to
408recover data from the undamaged blocks in the file.
409</P><P>
410
411The compressed representation of each block is delimited by a 48-bit
412pattern, which makes it possible to find the block boundaries with
413reasonable certainty.  Each block also carries its own 32-bit CRC, so
414damaged blocks can be distinguished from undamaged ones.
415</P><P>
416
417<CODE>bzip2recover</CODE> is a simple program whose purpose is to search for
418blocks in <CODE>.bz2</CODE> files, and write each block out into its own
419<CODE>.bz2</CODE> file.  You can then use <CODE>bzip2 -t</CODE> to test the
420integrity of the resulting files, and decompress those which are
421undamaged.
422</P><P>
423
424<CODE>bzip2recover</CODE> 
425takes a single argument, the name of the damaged file, and writes a
426number of files <CODE>rec00001file.bz2</CODE>, <CODE>rec00002file.bz2</CODE>, etc,
427containing the extracted blocks.  The output filenames are designed so
428that the use of wildcards in subsequent processing -- for example,
429<CODE>bzip2 -dc rec*file.bz2 &#62; recovered_data</CODE> -- processes the files in
430the correct order.
431</P><P>
432
433<CODE>bzip2recover</CODE> should be of most use dealing with large <CODE>.bz2</CODE>
434files, as these will contain many blocks.  It is clearly futile to use
435it on damaged single-block files, since a damaged block cannot be
436recovered.  If you wish to minimise any potential data loss through
437media or transmission errors, you might consider compressing with a
438smaller block size.
439</P><P>
440
441<HR SIZE="6">
442<A NAME="SEC9"></A>
443<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
444<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC8"> &lt; </A>]</TD>
445<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC10"> &gt; </A>]</TD>
446<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[ &lt;&lt; ]</TD>
447<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
448<TD VALIGN="MIDDLE" ALIGN="LEFT">[ &gt;&gt; ]</TD>
449<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
450<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
451<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
452<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
453</TR></TABLE>
454<H4> PERFORMANCE NOTES </H4>
455<!--docid::SEC9::-->
456<P>
457
458The sorting phase of compression gathers together similar strings in the
459file.  Because of this, files containing very long runs of repeated
460symbols, like "aabaabaabaab ..."  (repeated several hundred times) may
461compress more slowly than normal.  Versions 0.9.5 and above fare much
462better than previous versions in this respect.  The ratio between
463worst-case and average-case compression time is in the region of 10:1.
464For previous versions, this figure was more like 100:1.  You can use the
465<CODE>-vvvv</CODE> option to monitor progress in great detail, if you want.
466</P><P>
467
468Decompression speed is unaffected by these phenomena.
469</P><P>
470
471<CODE>bzip2</CODE> usually allocates several megabytes of memory to operate
472in, and then charges all over it in a fairly random fashion.  This means
473that performance, both for compressing and decompressing, is largely
474determined by the speed at which your machine can service cache misses.
475Because of this, small changes to the code to reduce the miss rate have
476been observed to give disproportionately large performance improvements.
477I imagine <CODE>bzip2</CODE> will perform best on machines with very large
478caches.
479</P><P>
480
481<HR SIZE="6">
482<A NAME="SEC10"></A>
483<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
484<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC9"> &lt; </A>]</TD>
485<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC11"> &gt; </A>]</TD>
486<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[ &lt;&lt; ]</TD>
487<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
488<TD VALIGN="MIDDLE" ALIGN="LEFT">[ &gt;&gt; ]</TD>
489<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
490<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
491<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
492<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
493</TR></TABLE>
494<H4> CAVEATS </H4>
495<!--docid::SEC10::-->
496<P>
497
498I/O error messages are not as helpful as they could be.  <CODE>bzip2</CODE>
499tries hard to detect I/O errors and exit cleanly, but the details of
500what the problem is sometimes seem rather misleading.
501</P><P>
502
503This manual page pertains to version 1.0.2 of <CODE>bzip2</CODE>.  Compressed
504data created by this version is entirely forwards and backwards
505compatible with the previous public releases, versions 0.1pl2, 0.9.0,
5060.9.5, 1.0.0 and 1.0.1, but with the following exception: 0.9.0 and
507above can correctly decompress multiple concatenated compressed files.
5080.1pl2 cannot do this; it will stop after decompressing just the first
509file in the stream.
510</P><P>
511
512<CODE>bzip2recover</CODE> versions prior to this one, 1.0.2, used 32-bit
513integers to represent bit positions in compressed files, so it could not
514handle compressed files more than 512 megabytes long.  Version 1.0.2 and
515above uses 64-bit ints on some platforms which support them (GNU
516supported targets, and Windows).  To establish whether or not
517<CODE>bzip2recover</CODE> was built with such a limitation, run it without
518arguments.  In any event you can build yourself an unlimited version if
519you can recompile it with <CODE>MaybeUInt64</CODE> set to be an unsigned
52064-bit integer.
521</P><P>
522
523<HR SIZE="6">
524<A NAME="SEC11"></A>
525<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
526<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC10"> &lt; </A>]</TD>
527<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_3.html#SEC12"> &gt; </A>]</TD>
528<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[ &lt;&lt; ]</TD>
529<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
530<TD VALIGN="MIDDLE" ALIGN="LEFT">[ &gt;&gt; ]</TD>
531<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
532<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
533<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
534<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
535</TR></TABLE>
536<H4> AUTHOR </H4>
537<!--docid::SEC11::-->
538Julian Seward, <CODE>jseward@acm.org</CODE>.
539<P>
540
541<CODE>http://sources.redhat.com/bzip2</CODE>
542</P><P>
543
544The ideas embodied in <CODE>bzip2</CODE> are due to (at least) the following
545people: Michael Burrows and David Wheeler (for the block sorting
546transformation), David Wheeler (again, for the Huffman coder), Peter
547Fenwick (for the structured coding model in the original <CODE>bzip</CODE>,
548and many refinements), and Alistair Moffat, Radford Neal and Ian Witten
549(for the arithmetic coder in the original <CODE>bzip</CODE>).  I am much
550indebted for their help, support and advice.  See the manual in the
551source distribution for pointers to sources of documentation.  Christian
552von Roques encouraged me to look for faster sorting algorithms, so as to
553speed up compression.  Bela Lubkin encouraged me to improve the
554worst-case compression performance.  The <CODE>bz*</CODE> scripts are derived
555from those of GNU <CODE>gzip</CODE>.  Many people sent patches, helped with
556portability problems, lent machines, gave advice and were generally
557helpful.
558</P><P>
559
560</BLOCKQUOTE>
561
562<HR SIZE="6">
563<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
564<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[ &lt;&lt; ]</TD>
565<TD VALIGN="MIDDLE" ALIGN="LEFT">[ &gt;&gt; ]</TD>
566<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
567<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
568<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
569<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
570</TR></TABLE>
571<BR> 
572<FONT SIZE="-1">
573This document was generated
574by <I>Julian Seward</I> on <I>January, 5  2002</I>
575using <A HREF="http://www.mathematik.uni-kl.de/~obachman/Texi2html
576"><I>texi2html</I></A>
577
578</BODY>
579</HTML>
Note: See TracBrowser for help on using the repository browser.