Originally published in Rebol Forces.

Reb-IT!
Author: Ole Friis (ole_f@post3.tele.dk)
Date: Jul, 2001
Part: 5 of 6

Contents

The main program

I decided to put all the functions into an object, and then have the COMPRESS and DECOMPRESS functions inside this object do all the work. Here's how it looks - hopefully you'll recognize most of the functions!

compress-bw: context [
; Burrows-Wheeler encoding
 
encode-bw: func [
"Encodes a string using the Burrows-Wheeler transform"
str [string!] "The string to encode"
/local
str-length permutations
transformed-string transformed-info i
][
str-length: length? str
 
; Build a representation of the permutations
permutations: make block! str-length
for i 1 str-length 1 [append permutations i]
 
; Sort the permutations in lexicographic order
sort/compare permutations func [n1 n2 /local c1 c2] [
loop str-length [
if n1 < 1 [n1: str-length]
if n2 < 1 [n2: str-length]
c1: pick str either n1 = 1 [1][str-length - n1 + 2]
c2: pick str either n2 = 1 [1][str-length - n2 + 2]
if c1 < c2 [return true]
if c1 > c2 [return false]
n1: n1 - 1
n2: n2 - 1
]
return true
]
 
; Now, let's create the new string
transformed-string: make string! str-length
transformed-info: (index? find permutations 1)
 
foreach permutation permutations [
append transformed-string pick str (str-length - permutation + 1)
]
reduce [transformed-string transformed-info]
]
 
decode-bw: func [
"Decodes a string using the Burrows-Wheeler transform"
str [string!] "The encoded version of the string"
index [integer!] "The index to the original text"
/local
str-length first-row found-table t letter-index
old-offset new-offset decoded-string
][
str-length: length? str
 
; Find t, which is the relation between str and the permutations
first-row: sort/case copy str
insert/dup (found-table: make block! 256) 0 256
t: make block! str-length
foreach letter str [
letter-index: (to-integer letter) + 1
old-offset: pick found-table letter-index
new-offset: index? find/case (skip first-row old-offset) letter
append t new-offset
poke found-table letter-index new-offset
]
 
; Now the rest is easy
decoded-string: make string! str-length
insert decoded-string pick str index
loop str-length - 1 [
insert decoded-string pick str (index: pick t index)
]
decoded-string
]
 
 
; Move-to-Front encoding
 
encode-mtf: func [
"Encodes a string using Move-to-Front"
str [string!] "The string to encode"
table [block!] "The initial table to use"
/local result index
][
table: copy table
result: make block! (length? str)
foreach letter str [
index: find/case table letter
append result (index? index) - 1
remove index
insert table letter
]
result
]
 
decode-mtf: func [
"Decodes a string using Move-to-Front"
indices [block!] "The encoded version of the string"
table [block!] "The initial table to use"
/local result letter
][
table: copy table
result: make string! (length? indices)
foreach index indices [
letter: pick table (index + 1)
append result letter
remove skip table index
insert table letter
]
result
]
 
block-to-string: func [
"Converts output from encode-mtf to a string"
b [block!] "The encode-mtf output"
/local res
][
res: copy ""
foreach i b [
append res to-char i
]
res
]
 
string-to-block: func [
"Converts a string to input to decode-mtf"
s [string!] "The string to convert"
/local res
][
res: copy []
foreach c s [
append res to-integer c
]
res
]
 
 
; Huffman encoding
 
construct-huffman: func [
"Constructs a Huffman tree, using the given statistics"
stats [block!] "The statistics"
/local probs symbol-table node1 node2 new-node temp-list
][
; First, make a flat list of all the characters
probs: make block! 256
for i 1 256 1 [
append/only probs reduce [pick stats i (i - 1) none]
]
 
; Then construct the tree, joining two nodes each time
symbol-table: copy probs
sort probs
while [1 < length? probs][
; Pick the two nodes with least probability
node1: first probs
node2: second probs
remove/part probs 2
; Construct a father to node1 and node2
new-node: reduce [(first node1) + (first node2) node1 node2 none]
change/only (back tail node1) new-node
change/only (back tail node2) new-node
; Insert the new node correctly in the "probs" list
temp-list: probs
while [all [not tail? temp-list (first temp-list) < new-node]][
temp-list: next temp-list
]
insert/only temp-list new-node
]
; Return the top element of the Huffman tree and the original flattened tree.
reduce [first probs symbol-table]
]
 
encode-huffman-char: func [
"Huffman-encodes a character"
symbol [char!] "Character to encode"
tree [block!] "The Huffman tree to use"
/local node code parent
][
node: pick (second tree) (to-integer symbol) + 1
code: copy ""
while [found? parent: last node][
insert code either node == third parent ["0"] ["1"]
node: parent
]
code
]
 
decode-huffman-char: func [
"Huffman-decodes a character"
code [string!] "The Huffman code to decipher (will be altered)"
tree [block!] "The Huffman tree to use"
/local node
][
node: first tree
until [
either (first code) = #"1" [node: second node] [node: third node]
remove code
(length? node) = 3
]
to-char second node
]
 
encode-huffman: func [
"Huffman-encodes a string"
str [string!] "String to encode"
tree [block!] "The Huffman tree to use"
/local result
][
result: copy ""
while [not tail? str][
append result encode-huffman-char first str tree
str: next str
]
result
]
 
decode-huffman: func [
"Huffman-decodes a string"
code [string!] "The Huffman code to decipher"
tree [block!] "The Huffman tree to use"
count [integer!] "Number of characters to decipher"
][
code: copy code
result: copy ""
loop count [
append result decode-huffman-char code tree
]
result
]
 
 
; Bit-fiddling
 
encode-integer: func [
"Converts a Rebol integer to a 4-character string"
i [integer!] "The integer to encode"
][
join "" reduce [
to-char (i / 16777216) ; 16777216 = power 2 24
to-char (i / 65536) // 256 ; 65536 = power 2 16
to-char (i / 256) // 256
to-char i // 256
]
]
 
decode-integer: func [
"Converts a 4-character string to a Rebol integer"
s [string!] "The 4-character string to decode"
][
to-integer ((to-integer first s) * 16777216) + ; 16777216 = power 2 24
((to-integer second s) * 65536) + ; 65536 = power 2 16
((to-integer third s) * 256) +
to-integer fourth s
]
 
encode-bitstream: func [
"Encodes a string of 1's and 0's to a binary string"
s [string!] "The string of 1's and 0's"
/local res byte add-this
][
res: copy ""
forever [
byte: 0
add-this: 128
while [(add-this <> 0) and (not tail? s)] [
if #"1" = first s [byte: byte + add-this]
add-this: to-integer (add-this / 2)
s: next s
]
append res to-char byte
if tail? s [return res]
]
]
 
decode-bitstream: func [
"Decodes a binary string into a string of 1's and 0's"
s [string!] "The binary string"
/local res next-bit
][
res: copy ""
next-bit: 128
while [not tail? s] [
append res either 0 = and~ next-bit to-integer first s ["0"]["1"]
next-bit: to-integer (next-bit / 2)
if next-bit = 0 [next-bit: 128 s: next s]
]
res
]
 
 
; Initialization
 
prin "Creating table for Move-to-Front functions... "
mtf-table: copy used-letters: [
#"." #"s" #"r" #"g" #"m" #"w" #"a" #"l" #"y" #"," #"e"
#"o" #"t" #" " #"n" #"k" #"W" #"d" #"h" #"i" #"f" #"x"
]
for i 0 255 1 [
letter: to-char i
if not found? find/case used-letters letter [
append mtf-table letter
]
]
print "Done."
 
prin "Constructing Huffman tree... "
huffman-tree: construct-huffman load %probabilities.r
print "Done."
 
 
; Main functions
 
compress: func [
"Compresses a string"
str [string!] "String to compress"
/local bw-string bw-index mtf-string huffman-string res
][
prin "Performing Burrows-Wheeler transformation (might take a while)... "
set [bw-string bw-index] encode-bw str
print "Done."
 
prin "Performing Move-to-Front encoding... "
mtf-string: block-to-string encode-mtf bw-string mtf-table
print "Done."
 
prin "Huffman-encoding... "
huffman-string: encode-huffman mtf-string huffman-tree
print "Done."
 
prin "Creating final representation... "
res: rejoin reduce [
encode-integer length? str
encode-integer bw-index
encode-bitstream huffman-string
]
print "Done."
res
]
 
decompress: func [
"Decompresses a string"
str [string!] "String to decompress"
/local bw-string bw-index mtf-string huffman-string res
][
prin "Analysing representation... "
str-length: decode-integer str
bw-index: decode-integer skip str 4
huffman-string: decode-bitstream skip str 8
print "Done."
 
prin "Huffman-decoding... "
mtf-string: decode-huffman huffman-string huffman-tree str-length
print "Done."
 
prin "Performing Move-to-Front decoding... "
bw-string: decode-mtf string-to-block mtf-string mtf-table
print "Done."
 
prin "Performing Burrows-Wheeler inverse transformation... "
res: decode-bw bw-string bw-index
print "Done."
res
]
]

The Probabilities.r file

Copy the following and put it in a file called Probabilities.r, situated in the same directory as where you run the file above:

3269 933 503 467 324 300 250 237 187 166 126 148 102 118 95 66 100 113 71
37 79 55 34 36 26 38 26 38 8 15 12 10 11 13 7 12 8 10 6 2 3 9 2 4 3 1 2 2
2 1 7 1 0 0 0 0 0 0 0 1 1 1 0 1 2 0 0 2 18 0 0 0 0 1 1 3 1 0 0 0 0 1 1 2
0 2 2 1 1 2 1 0 1 0 1 1 0 16 5 3 0 2 0 0 2 0 0 3 3 0 1 2 2 2 3 0 1 0 2 2
2 33 21 4 21 5 2 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
0 0 0 0 3 2 2 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0

As noted earlier, you could do your own table too, but this is a good starting point.