blob: d78fa93794bfcdf26aac3a52f4d8c71095f67332 [file] [log] [blame]
// Copyright 2014 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package fountain
import (
"math"
"math/rand"
)
// Implemention of Online Codes. See
// http://cs.nyu.edu/web/Research/TechReports/TR2002-833/TR2002-833.pdf
// After Maymounkov and Mazieres
//
// The input message is a sequence of bytes which is split into N blocks.
//
// The constraints on the codec parameters are that 0.55*e*q*N >= q, probably
// like q*2, which implies that 0.55*e*N >= 1 or e*N >= 1.82.
//
// Taking epsilon small (i.e. 0.01), this means N > 200 or so. A more
// reasonable approach gives N > 400 or so for e=0.01.
//
// What this means for small texts is that e should be fairly big, like say
// 0.3 or something. That means 0.3*N > 4 or so, meaning N is like 12-15.
// This still allows us to get good probability of recovery. It means source
// symbols are small, but (e/2)^(q+1) can still be 10^-12 with e=0.3 if
// q is large enough (like 15).
//
// The number of code blocks expected to provide almost certain message
// recovery is (1+epsilon)(1+0.55*Q*epsilon)N where N is the number of source blocks.
//
// For large texts, the primary concern is picking N such that the packet
// size is convenient. Ideally epsilon is quite small, meaning not much extra
// data transmission required, and since there are many blocks, that can be
// done without increasing q very much.
// onlineCodec contains the parameters for application of an online code, typically
// for a particular known message.
// Recommended parameters for large NumSourceBlocks: Epsilon=0.01, Quality=3.
// Note that it requires quite large numbers (that is, thousands) of input blocks
// to approach optimality. For example, NumSourceBlocks=1000 requires about 3%
// overhead at these settings to achieve recovery error rate of 1e-8.
// Implements fountain.Codec
type onlineCodec struct {
// epsilon is the suboptimality parameter. ("Efficiency" or "e")
// A message of N blocks can be decoded with high probability
// from (1+3*epsilon)*numSourceBlocks received blocks.
epsilon float64
// quality is the decoder quality factor ("q"). This parameter influences the
// failure rate of the decoder.
// Given (1+3*epsilon)*N blocks, the algorithm will fail with probability
// (epsilon/2)^(quality+1)
quality int
// numSourceBlocks is the number of source blocks ("N") to construct from the
// input message. This parameter interacts with the message length to set the
// packet size, so should be picked with that in mind.
numSourceBlocks int
// randomSeed is a source of randomness for selecting auxiliary encoding blocks.
// This seeds a psuedorandom source identically for both encoding and decoding.
randomSeed int64
// cdf is the cumulative distribution function of the degree distribution.
cdf []float64
}
// NewOnlineCodec creates a new encoder for an Online code.
// epsilon is the suboptimality parameter. ("Efficiency" or "e")
// A message of N blocks can be decoded with high probability
// from (1+3*epsilon)*numSourceBlocks received blocks.
// quality is the decoder quality factor ("q"). This parameter influences the
// failure rate of the decoder.
// Given (1+3*epsilon)*N blocks, the algorithm will fail with probability
// (epsilon/2)^(quality+1)
// seed is the random seed used to pick auxiliary encoding blocks.
func NewOnlineCodec(sourceBlocks int, epsilon float64, quality int, seed int64) Codec {
return &onlineCodec{
epsilon: epsilon,
quality: quality,
numSourceBlocks: sourceBlocks,
randomSeed: seed,
cdf: onlineSolitonDistribution(epsilon)}
}
// SourceBlocks returns the number of source blocks into which the codec will
// partition an input message.
func (c *onlineCodec) SourceBlocks() int {
return c.numSourceBlocks
}
// numAuxBlocks returns the number of auxiliary blocks to create for the outer
// encoding.
func (c onlineCodec) numAuxBlocks() int {
// Note: equation is from the paper.
return int(math.Ceil(0.55 * float64(c.quality) * c.epsilon * float64(c.numSourceBlocks)))
}
// estimateDecodeBlocksNeeded returns a rough lower bound on the number of decode
// blocks likely needed to successfully decode a message. This number is about
// (1+epsilon)(NumSourceBlocks + numAuxBlocks)
func (c onlineCodec) estimateDecodeBlocksNeeded() int {
return int(math.Ceil((1 + c.epsilon) * float64(c.numSourceBlocks+c.numAuxBlocks())))
}
// GenerateIntermediateBlocks finds a set of auxiliary encoding blocks using an
// LT process, which it then appends to the original set of message blocks.
func (c *onlineCodec) GenerateIntermediateBlocks(message []byte, numBlocks int) []block {
src, aux := generateOuterEncoding(message, *c)
intermediate := make([]block, len(src), len(src)+len(aux))
copy(intermediate, src)
intermediate = append(intermediate, aux...)
return intermediate
}
// generateOuterEncoding creates the source and auxiliary blocks after section
// 3.1 of http://pdos.csail.mit.edu/~petar/papers/maymounkov-bigdown-lncs.ps
// Returns two slices of blocks: the original source blocks and the auxiliary
// blocks.
// Basic idea: the auxiliary blocks are randomly composed of the source blocks
// and then used to generate code blocks. This makes recovery of the full
// original message from code blocks more robust.
func generateOuterEncoding(message []byte, codec onlineCodec) ([]block, []block) {
numAuxBlocks := codec.numAuxBlocks()
long, short := partitionBytes(message, codec.numSourceBlocks)
source := equalizeBlockLengths(long, short)
aux := make([]block, numAuxBlocks)
// Ensure all aux blocks have the same length as the source blocks,
// even if they don't happen to get loaded with data.
for i := range aux {
aux[i].padding = source[0].length()
}
random := rand.New(NewMersenneTwister(codec.randomSeed))
for i := 0; i < codec.numSourceBlocks; i++ {
touchAuxBlocks := sampleUniform(random, codec.quality, numAuxBlocks)
for _, j := range touchAuxBlocks {
aux[j].xor(source[i])
}
}
return source, aux
}
// generateCodeBlock creates a new code symbol, which is the XOR of
// outer blocks [b_k1, b_k2, b_k3, ... b_kd]
// Where the sequence k1, k2, k3, ..., kd is provided in the indices.
func generateCodeBlock(source []block, aux []block, indices []int) block {
var symbol block
for _, i := range indices {
if i < len(source) {
symbol.xor(source[i])
} else {
symbol.xor(aux[i-len(source)])
}
}
return symbol
}
// PickIndices finds the source indices for a code block given an ID using
// the CDF for the online degree distribution.
func (c *onlineCodec) PickIndices(codeBlockIndex int64) []int {
random := rand.New(NewMersenneTwister(codeBlockIndex))
degree := pickDegree(random, c.cdf)
// Pick blocks from the augmented set of original+aux blocks produced
// by GenerateIntermediateBlocks.
s := sampleUniform(random, degree, c.SourceBlocks()+c.numAuxBlocks())
return s
}
// encodeOnlineBlocks creates a set of online code blocks given the ids provided.
// An easy way to generate the ids is to pick a pseudo-random sequence and then
// just grab the first M members of the sequence.
// The characteristic of an online code is that this method may be called
// repeatedly with different ids, generating different code blocks for the same
// message, all of which can then be used interchangeably by the decoder.
// For each code block, we pick a random set of outer-encoding blocks to XOR
// to compose it.
func encodeOnlineBlocks(message []byte, ids []int64, codec onlineCodec) []LTBlock {
source, aux := generateOuterEncoding(message, codec)
blocks := make([]LTBlock, len(ids))
for i := range blocks {
indices := codec.PickIndices(ids[i])
block := generateCodeBlock(source, aux, indices)
blocks[i].BlockCode = ids[i]
blocks[i].Data = make([]byte, source[0].length())
copy(blocks[i].Data, block.data)
}
return blocks
}
// onlineDecoder is the state required for decoding a particular message prepared
// with the onlineCodec. It must be initialized with the same parameters
// used for encoding, as well as the expected message length.
// Implements fountain.Decoder
type onlineDecoder struct {
codec *onlineCodec
messageLength int
// The sparse equation matrix used for decoding.
matrix sparseMatrix
}
// NewDecoder creates an online transform decoder
func (c *onlineCodec) NewDecoder(messageLength int) Decoder {
return newOnlineDecoder(c, messageLength)
}
// newOnlineDecoder creates a new decoder for a particular message. The codec
// parameters as well as the original message length must be provided. The
// decoder is only valid for decoding blocks for a particular source message.
func newOnlineDecoder(c *onlineCodec, length int) *onlineDecoder {
d := &onlineDecoder{codec: c, messageLength: length}
numAuxBlocks := c.numAuxBlocks()
d.matrix.coeff = make([][]int, c.numSourceBlocks+numAuxBlocks)
d.matrix.v = make([]block, c.numSourceBlocks+numAuxBlocks)
// Now we add the initial auxiliary equations into the decode matrix.
// These come in as synthetic decode blocks, which have value 0 and
// coefficient bits set indicating their constituent outer blocks.
auxBlockComposition := make([][]int, numAuxBlocks)
random := rand.New(NewMersenneTwister(c.randomSeed))
for i := 0; i < c.numSourceBlocks; i++ {
touchAuxBlocks := sampleUniform(random, c.quality, numAuxBlocks)
for _, j := range touchAuxBlocks {
auxBlockComposition[j] = append(auxBlockComposition[j], i)
}
}
for i := range auxBlockComposition {
auxBlockComposition[i] = append(auxBlockComposition[i], i+c.numSourceBlocks)
}
// Note: these composition slices are guaranteed sorted since we added constituent
// source blocks in order, followed by the aux block index. So we can now just
// add them to the equation matrix as if they were received.
for i := range auxBlockComposition {
d.matrix.addEquation(auxBlockComposition[i], block{})
}
return d
}
// AddBlocks adds a set of encoded blocks to the decoder. Returns true if the
// message can be fully decoded. False if there is insufficient information.
func (d *onlineDecoder) AddBlocks(blocks []LTBlock) bool {
for i := range blocks {
indices := d.codec.PickIndices(blocks[i].BlockCode)
d.matrix.addEquation(indices, block{data: blocks[i].Data})
}
return d.matrix.determined()
}
// Decode extracts the decoded message from the decoder. If the decoder does
// not have sufficient information to produce an output, returns a nil slice.
func (d *onlineDecoder) Decode() []byte {
if !d.matrix.determined() {
return nil
}
d.matrix.reduce()
lenLong, lenShort, numLong, numShort := partition(d.messageLength, d.codec.numSourceBlocks)
return d.matrix.reconstruct(d.messageLength, lenLong, lenShort, numLong, numShort)
}