Compression algorithm (Compression)

Run-length Encoding (RLE)

RLE is probably the easiest way to do compression. The assumed data are as follows:

aaaaabbbcdeeeeeeef...
Copy the code

RLE then encodes it as:

5a3b1c1d7e1f...
Copy the code

When data has many duplicate bytes, RLE can save considerable space. It works fine on images.

The compression code

extension Data {
  public func compressRLE(a) -> Data {
    var data = Data(a)self.withUnsafeBytes { (uPtr: UnsafePointer<UInt8>) in
    	var ptr = uPtr
      let end = ptr + count
      while ptr < end {
        var count = 0
        var byte = ptr.pintee
        var next = byte
      
        while next = = byte && ptr < end && count < 64 {
          ptr = ptr.advanced(by: 1)
          next = ptr.pointee
          count + = 1
        }
        
        if count > 1 || byte > = 192 {
          var size = 191 + UInt8(count)
          data.append(&size, count: 1)
          data.append(&size, count: 1)}else {
          data.append(&byte, count: 1)}}}return data
  }
}
Copy the code

Decompression code

public func decompressRLE(a) -> Data {
  var data = Data(a)self.withUnsafeBytes { (uPtr: UnsafePointer<UInt8>) in
  	var ptr = uPtr
    let end = ptr + count
    
    while ptr < end {
      // Read the next byte. This is either a single value less than 192,
      // or the start of a byte run.
      var byte = ptr.pointee
      ptr = ptr.advanced(by: 1)
      
      if byte < 192 {
        data.append(&byte, count: 1)}else if ptr < end {
        // Read the actual data value.
        var value = ptr.pointee
        ptr = ptr.advanced(by: 1)
        
        // And write it out repeatedly.
        for _ in 0 ..< byte - 191 {
          data.append(&value, count: 1)}}}}return data
}
Copy the code

Huffman Coding

An example is the most intuitive and simple illustration

Suppose you have the following text

so much words wow many compression
Copy the code

Calculate the frequency of each byte

space: 5                   u: 1
    0: 5                   h: 1
    s: 4                   d: 1
    m: 3                   a: 1
    w: 3                   y: 1
    c: 2                   p: 1
    r: 2                   e: 1
    n: 2                   i: 1
Copy the code

You can code it like this

sapce: 5    010         u: 1    11001
    0: 5    000         h: 1    10001
    s: 4    101         d: 1    11010
    m: 3    111         a: 1    11011
    w: 3    0010        y: 1    01111
    c: 2    0011        p: 1    11000
    r: 2    1001        e: 1    01110
    n: 2    0110        i: 1    10000
Copy the code
101 000 010 111 11001 0011 10001 010 0010 000 1001 11010 101
s   o   _   m   u     c    h     _   w    o   r    d     s

010 0010 000 0010 010 111 11011 0110 01111 010 0011 000 111
_   w    o   w    _   m   a     n    y     _   c    o   m

11000 1001 01110 101 101 10000 000 0110 0
p     r    e     s   s   i     o   n
Copy the code

The use of Huffman codes on small inputs is disadvantageous.

The auxiliary code

The minimum data that NSData understands is bytes, but we’re dealing with bits, so we need to convert between the two

Npublic class BitWriter {
  public var data = NSMutableData(a)var outByte: UInt8 = 0
  var outCount = 0
  
  public func writeBit(bit: Bool) {
    if outCount = = 8 {
      data.append(&outByte, length: 1)
      outCount = 0
    }
    outByte = (outByte << 1) | (bit ? 1 : 0)
    outCount + = 1
  } 
  
  public func flush(a) {
    if outCount > 0 {
      if outCount < 8 {
        let diff = UInt8(8 - outCount)
        outByte < < = diff
      }
      data.append(&outByte, length: 1)}}}Copy the code

Used to read bits from NSData:

public class BitReader {
  var ptr: UnsafePointer<UInt8>
  var inByte: UInt8 = 0
  var inCount = 8
  
  public init(data: NSData) {
    ptr = data.bytes.assumingMemoryBound(to: UInt8.self)}public func readBit(a) -> Bool {
    if inCount = = 8 {
      inByte = ptr.pointee    // load the next byte
      inCount = 0
      ptr = ptr.successor()
    }
    let bit = inByte & 0x80   // read the next bit
    inByte < < = 1
    inCount + = 1
    return bit = = 0 ? false : true}}Copy the code
class Huffman {
  typealias NodeIndex = Int
  
  struct Node {
    var count = 0
    var index: NodeIndex = -1
    var parent: NodeIndex = -1
    var left: NodeIndex = -1
    var right: NodeIndex = -1
  }
  
  var tree = [Node](repeating: Node(), count: 256)
  
  var root: NodeIndex = -1
}
Copy the code

Calculate the frequency of each byte in the input data

fileprivate func countByteFrequency(inData data: NSData) {
  var ptr = data.bytes.assumingMemoryBound(to: UInt8.self)
  for _ in 0..<data.length {
    let i = Int(ptr.pointee)
    tree[i].count + = 1
    tree[i].index = i
    ptr = ptr.successor()
  }
}
Copy the code

Export frequency table

struct Freq {
  var byte: UInt8 = 0
  var count = 0
}

func frequencyTable(a)- > [Freq] {
  var a = [Freq] ()for i in 0..<256 where tree[i].count > 0 {
    a.append(Freq(byte: UInt8(i), count: tree[i].count))
  }
  return a
}
Copy the code

Use a priority queue

fileprivate func buildTree(a) {
  var queue = PriortyQueue<Node>(sort: { $0.count < The $1.count })
  for node in tree where node.count > 0 {
    queue.enqueue(node)
  }
  
  while queue.count > 1 {
    let node1 = queue.dequeue()!
    let node2 = queue.dequeue()!
    
    var parentNode = Node()
    parentNode.count = node1.count + node2.count
    parentNode.left = node1.index
    parentNode.right = node2.index
   	parentNode.index = tree.count
    tree.append(parentNode)
    
    tree[node1.index].parent = parentNode.index
    tree[node2.index].parent = parentNode.index
    
    queue.enqueue(parentNode)
  }
  
  let rootNode = queue.dequeue()!
  root = rootNode.index
}
Copy the code

Now that we know how to build a compression tree from the frequency table, we can use it to compress the contents of the NSData object.

public func compressData(data: NSData) -> NSData {
  countByteFrequency(inData: data)
  buildTree()
  
  let writer = BitWriter(a)var ptr = data.bytes.assumingMemoryBound(to: UInt8.self)
  for _ in 0..<data.length {
    let c = ptr.pointee
    let i = Int(c)
    traverseTree(writer: writer, nodeIndex: i, childIndex: -1)
    ptr = ptr.successor()
  }
  writer.flush()
  return writer.data
}
Copy the code

Note: Compression always involves traversing the entire input data twice: first building the frequency table and then converting the bytes into a compressed sequence of bits.

Something interesting happens in traverseTree(). This is a recursive approach:

private func traverseTree(writer: BitWriter.nodeIndex h: Int.childIndex child: Int) {
  if tree[h].parent ! = -1 {
    traverseTree(writer: writer, nodeIndex: tree[h].parent, childIndex: h)
  }
  if child ! = -1 {
    if child = = tree[h].left {
      writer.writeBit(bit: true)}else if child = = tree[h].right {
      writer.writeBit(bit: false)}}}Copy the code

Use the compressData() method like this:

let s1 = "so much words wow many compression"
if let originalData = s1.dataUsingEncoding(NSUTF8StringEncoding) {
  let huffman1 = Huffman(a)let compressedData = huffman1.compressData(originalData)
  print(compressedData.length)
}
Copy the code

unzip

First we need some methods to convert the [Freq] array back to the compression tree:

fileprivate func restoreTree(fromTable frequencyTable: [Freq]) {
  for freq in frequencyTable {
    let i = Int(freq.byte)
    tree[i].count = freq.count
    tree[i].index = i
  }
  buildTree()
}
Copy the code
func decompressData(data: NSData.frequencyTable: [Freq]) -> NSData {
  restoreTree(fromTable: frequencyTable)
  
  let reader = BitReader(data: data)
  let outData = NSMutableData(a)let byteCount = tree[root].count
  
  var i = 0
  while i < byteCount {
    var b = findLeafNode(reader: reader, nodeIndex: root)
    outData.append(&b, length: 1)
    i + = 1
  }
  return outData
}
Copy the code

Also use helper methods to traverse the tree

private func findLeafNode(reader reader: BitReader.nodeIndex: Int) -> UInt8 {
  var h = nodeIndex
  while tree[h].right ! = -1 {
    if reader.readBit() {
      h = tree[h].left
    } else {
      h = tree[h].right
    }
  }
  return UInt8(h)
}
Copy the code

How to use the decompression method:

let frequencyTable = huffman1.frequencyTable()

let huffman2 = Huffman(a)let decompressedData = huffman2.decompressData(compressedData, frequencyTable: frequencyTable: frequencyTable)

let s2 = String(data: decompressedData, encoding: NSUTF8StringEncoding)!
Copy the code