Current Dev State

This commit is contained in:
Tim Lorsbach
2025-06-23 20:13:54 +02:00
parent b4f9bb277d
commit ded50edaa2
22617 changed files with 4345095 additions and 174 deletions

View File

@ -0,0 +1,53 @@
#include <nan.h>
#include <node.h>
#include <node_buffer.h>
#include <stdlib.h>
#include "./woff2/woff2_enc.h"
using namespace v8;
NAN_METHOD(convert) {
Local<Object> inputBuffer = info[0]->ToObject();
if (!node::Buffer::HasInstance(inputBuffer)) {
Nan::ThrowError(Nan::TypeError("First arg should be a Buffer"));
return;
}
size_t input_length = node::Buffer::Length(inputBuffer);
char* input_data = node::Buffer::Data(inputBuffer);
// Determine the maximum needed length
size_t max_output_length = woff2::MaxWOFF2CompressedSize(
reinterpret_cast<const uint8_t*>(input_data), input_length);
size_t actual_output_length = max_output_length;
char* output_data = (char*) calloc(max_output_length, 1);
// Create the Woff2 font
if (!woff2::ConvertTTFToWOFF2(
reinterpret_cast<const uint8_t*>(input_data), input_length,
reinterpret_cast<uint8_t*>(output_data), &actual_output_length
)) {
Nan::ThrowError(Nan::Error("Could not convert the given font."));
return;
}
// Free the unused memory
output_data = (char*) realloc(output_data, actual_output_length);
Nan::MaybeLocal<v8::Object> outputBuffer = Nan::NewBuffer(
output_data,
actual_output_length
);
info.GetReturnValue().Set(outputBuffer.ToLocalChecked());
}
NAN_MODULE_INIT(Init) {
Nan::Set(target, Nan::New("convert").ToLocalChecked(),
Nan::GetFunction(Nan::New<FunctionTemplate>(convert)).ToLocalChecked());
}
NODE_MODULE(addon, Init)

View File

@ -0,0 +1,374 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Function to find backward reference copies.
#include "./backward_references.h"
#include <algorithm>
#include <vector>
#include "./command.h"
namespace brotli {
template<typename Hasher, bool kUseCostModel, bool kUseDictionary>
void CreateBackwardReferences(size_t num_bytes,
size_t position,
const uint8_t* ringbuffer,
size_t ringbuffer_mask,
const float* literal_cost,
size_t literal_cost_mask,
const size_t max_backward_limit,
const double base_min_score,
const int quality,
Hasher* hasher,
int* dist_cache,
int* last_insert_len,
Command* commands,
int* num_commands) {
if (num_bytes >= 3 && position >= 3) {
// Prepare the hashes for three last bytes of the last write.
// These could not be calculated before, since they require knowledge
// of both the previous and the current block.
hasher->Store(&ringbuffer[(position - 3) & ringbuffer_mask],
position - 3);
hasher->Store(&ringbuffer[(position - 2) & ringbuffer_mask],
position - 2);
hasher->Store(&ringbuffer[(position - 1) & ringbuffer_mask],
position - 1);
}
const Command * const orig_commands = commands;
int insert_length = *last_insert_len;
size_t i = position & ringbuffer_mask;
const int i_diff = position - i;
const size_t i_end = i + num_bytes;
// For speed up heuristics for random data.
const int random_heuristics_window_size = quality < 9 ? 64 : 512;
int apply_random_heuristics = i + random_heuristics_window_size;
double average_cost = 5.4;
if (kUseCostModel) {
average_cost = 0.0;
for (int k = position; k < position + num_bytes; ++k) {
average_cost += literal_cost[k & literal_cost_mask];
}
if (num_bytes > 0) {
average_cost /= num_bytes;
}
}
// M1 match is for considering for two repeated copies, if moving
// one literal form the previous copy to the current one allows the
// current copy to be more efficient (because the way static dictionary
// codes words). M1 matching improves text compression density by ~0.15 %.
bool match_found_M1 = false;
int best_len_M1 = 0;
int best_len_code_M1 = 0;
int best_dist_M1 = 0;
double best_score_M1 = 0;
while (i + 3 < i_end) {
int max_length = i_end - i;
size_t max_distance = std::min(i + i_diff, max_backward_limit);
double min_score = base_min_score;
if (kUseCostModel && insert_length < 8) {
double cost_diff[8] =
{ 0.1, 0.038, 0.019, 0.013, 0.001, 0.001, 0.001, 0.001 };
min_score += cost_diff[insert_length];
}
int best_len = 0;
int best_len_code = 0;
int best_dist = 0;
double best_score = min_score;
bool match_found = hasher->FindLongestMatch(
ringbuffer, ringbuffer_mask,
literal_cost, literal_cost_mask, average_cost,
dist_cache, i + i_diff, max_length, max_distance,
&best_len, &best_len_code, &best_dist, &best_score);
if (match_found) {
if (kUseDictionary && match_found_M1 && best_score_M1 > best_score) {
// Two copies after each other. Take the last literal from the
// last copy, and use it as the first of this one.
Command prev_cmd = commands[-1];
commands[-1] = Command(prev_cmd.insert_len_,
prev_cmd.copy_len_ - 1,
prev_cmd.copy_len_ - 1,
prev_cmd.DistanceCode());
hasher->Store(ringbuffer + i, i + i_diff);
--i;
best_len = best_len_M1;
best_len_code = best_len_code_M1;
best_dist = best_dist_M1;
best_score = best_score_M1;
} else {
// Found a match. Let's look for something even better ahead.
int delayed_backward_references_in_row = 0;
for (;;) {
--max_length;
int best_len_2 = quality < 4 ? std::min(best_len - 1, max_length) : 0;
int best_len_code_2 = 0;
int best_dist_2 = 0;
double best_score_2 = min_score;
max_distance = std::min(i + i_diff + 1, max_backward_limit);
hasher->Store(ringbuffer + i, i + i_diff);
match_found = hasher->FindLongestMatch(
ringbuffer, ringbuffer_mask,
literal_cost, literal_cost_mask, average_cost,
dist_cache, i + i_diff + 1, max_length, max_distance,
&best_len_2, &best_len_code_2, &best_dist_2, &best_score_2);
double cost_diff_lazy = 7.0;
if (kUseCostModel) {
cost_diff_lazy = 0.0;
if (best_len >= 4) {
cost_diff_lazy +=
literal_cost[(i + 4) & literal_cost_mask] - average_cost;
}
{
const int tail_length = best_len_2 - best_len + 1;
for (int k = 0; k < tail_length; ++k) {
cost_diff_lazy -=
literal_cost[(i + best_len + k) & literal_cost_mask] -
average_cost;
}
}
// If we are not inserting any symbols, inserting one is more
// expensive than if we were inserting symbols anyways.
if (insert_length < 1) {
cost_diff_lazy += 0.97;
}
// Add bias to slightly avoid lazy matching.
cost_diff_lazy += 2.0 + delayed_backward_references_in_row * 0.2;
cost_diff_lazy += 0.04 * literal_cost[i & literal_cost_mask];
}
if (match_found && best_score_2 >= best_score + cost_diff_lazy) {
// Ok, let's just write one byte for now and start a match from the
// next byte.
++i;
++insert_length;
best_len = best_len_2;
best_len_code = best_len_code_2;
best_dist = best_dist_2;
best_score = best_score_2;
if (++delayed_backward_references_in_row < 4) {
continue;
}
}
break;
}
}
apply_random_heuristics =
i + 2 * best_len + random_heuristics_window_size;
max_distance = std::min(i + i_diff, max_backward_limit);
int distance_code = best_dist + 16;
if (best_dist <= max_distance) {
if (best_dist == dist_cache[0]) {
distance_code = 1;
} else if (best_dist == dist_cache[1]) {
distance_code = 2;
} else if (best_dist == dist_cache[2]) {
distance_code = 3;
} else if (best_dist == dist_cache[3]) {
distance_code = 4;
} else if (quality > 1 && best_dist >= 6) {
for (int k = 4; k < kNumDistanceShortCodes; ++k) {
int idx = kDistanceCacheIndex[k];
int candidate = dist_cache[idx] + kDistanceCacheOffset[k];
static const int kLimits[16] = { 0, 0, 0, 0,
6, 6, 11, 11,
11, 11, 11, 11,
12, 12, 12, 12 };
if (best_dist == candidate && best_dist >= kLimits[k]) {
distance_code = k + 1;
break;
}
}
}
if (distance_code > 1) {
dist_cache[3] = dist_cache[2];
dist_cache[2] = dist_cache[1];
dist_cache[1] = dist_cache[0];
dist_cache[0] = best_dist;
}
}
Command cmd(insert_length, best_len, best_len_code, distance_code);
*commands++ = cmd;
insert_length = 0;
if (kUseDictionary) {
++i;
// Copy all copied literals to the hasher, except the last one.
// We cannot store the last one yet, otherwise we couldn't find
// the possible M1 match.
for (int j = 1; j < best_len - 1; ++j) {
if (i + 3 < i_end) {
hasher->Store(ringbuffer + i, i + i_diff);
}
++i;
}
// Prepare M1 match.
if (hasher->HasStaticDictionary() &&
best_len >= 4 && i + 20 < i_end && best_dist <= max_distance) {
max_distance = std::min(i + i_diff, max_backward_limit);
best_score_M1 = min_score;
match_found_M1 = hasher->FindLongestMatch(
ringbuffer, ringbuffer_mask,
literal_cost, literal_cost_mask, average_cost,
dist_cache, i + i_diff, i_end - i, max_distance,
&best_len_M1, &best_len_code_M1, &best_dist_M1, &best_score_M1);
} else {
match_found_M1 = false;
}
if (kUseCostModel) {
// This byte is just moved from the previous copy to the current,
// that is no gain.
best_score_M1 -= literal_cost[i & literal_cost_mask];
// Adjust for losing the opportunity for lazy matching.
best_score_M1 -= 3.75;
}
// Store the last one of the match.
if (i + 3 < i_end) {
hasher->Store(ringbuffer + i, i + i_diff);
}
++i;
} else {
// Put the hash keys into the table, if there are enough
// bytes left.
for (int j = 1; j < best_len; ++j) {
hasher->Store(&ringbuffer[i + j], i + i_diff + j);
}
i += best_len;
}
} else {
match_found_M1 = false;
++insert_length;
hasher->Store(ringbuffer + i, i + i_diff);
++i;
// If we have not seen matches for a long time, we can skip some
// match lookups. Unsuccessful match lookups are very very expensive
// and this kind of a heuristic speeds up compression quite
// a lot.
if (i > apply_random_heuristics) {
// Going through uncompressible data, jump.
if (i > apply_random_heuristics + 4 * random_heuristics_window_size) {
// It is quite a long time since we saw a copy, so we assume
// that this data is not compressible, and store hashes less
// often. Hashes of non compressible data are less likely to
// turn out to be useful in the future, too, so we store less of
// them to not to flood out the hash table of good compressible
// data.
int i_jump = std::min(i + 16, i_end - 4);
for (; i < i_jump; i += 4) {
hasher->Store(ringbuffer + i, i + i_diff);
insert_length += 4;
}
} else {
int i_jump = std::min(i + 8, i_end - 3);
for (; i < i_jump; i += 2) {
hasher->Store(ringbuffer + i, i + i_diff);
insert_length += 2;
}
}
}
}
}
insert_length += (i_end - i);
*last_insert_len = insert_length;
*num_commands += (commands - orig_commands);
}
void CreateBackwardReferences(size_t num_bytes,
size_t position,
const uint8_t* ringbuffer,
size_t ringbuffer_mask,
const float* literal_cost,
size_t literal_cost_mask,
const size_t max_backward_limit,
const double base_min_score,
const int quality,
Hashers* hashers,
int hash_type,
int* dist_cache,
int* last_insert_len,
Command* commands,
int* num_commands) {
switch (hash_type) {
case 1:
CreateBackwardReferences<Hashers::H1, false, false>(
num_bytes, position, ringbuffer, ringbuffer_mask,
literal_cost, literal_cost_mask, max_backward_limit, base_min_score,
quality, hashers->hash_h1.get(), dist_cache, last_insert_len,
commands, num_commands);
break;
case 2:
CreateBackwardReferences<Hashers::H2, false, false>(
num_bytes, position, ringbuffer, ringbuffer_mask,
literal_cost, literal_cost_mask, max_backward_limit, base_min_score,
quality, hashers->hash_h2.get(), dist_cache, last_insert_len,
commands, num_commands);
break;
case 3:
CreateBackwardReferences<Hashers::H3, false, false>(
num_bytes, position, ringbuffer, ringbuffer_mask,
literal_cost, literal_cost_mask, max_backward_limit, base_min_score,
quality, hashers->hash_h3.get(), dist_cache, last_insert_len,
commands, num_commands);
break;
case 4:
CreateBackwardReferences<Hashers::H4, false, false>(
num_bytes, position, ringbuffer, ringbuffer_mask,
literal_cost, literal_cost_mask, max_backward_limit, base_min_score,
quality, hashers->hash_h4.get(), dist_cache, last_insert_len,
commands, num_commands);
break;
case 5:
CreateBackwardReferences<Hashers::H5, false, false>(
num_bytes, position, ringbuffer, ringbuffer_mask,
literal_cost, literal_cost_mask, max_backward_limit, base_min_score,
quality, hashers->hash_h5.get(), dist_cache, last_insert_len,
commands, num_commands);
break;
case 6:
CreateBackwardReferences<Hashers::H6, false, false>(
num_bytes, position, ringbuffer, ringbuffer_mask,
literal_cost, literal_cost_mask, max_backward_limit, base_min_score,
quality, hashers->hash_h6.get(), dist_cache, last_insert_len,
commands, num_commands);
break;
case 7:
CreateBackwardReferences<Hashers::H7, false, false>(
num_bytes, position, ringbuffer, ringbuffer_mask,
literal_cost, literal_cost_mask, max_backward_limit, base_min_score,
quality, hashers->hash_h7.get(), dist_cache, last_insert_len,
commands, num_commands);
break;
case 8:
CreateBackwardReferences<Hashers::H8, true, true>(
num_bytes, position, ringbuffer, ringbuffer_mask,
literal_cost, literal_cost_mask, max_backward_limit, base_min_score,
quality, hashers->hash_h8.get(), dist_cache, last_insert_len,
commands, num_commands);
break;
case 9:
CreateBackwardReferences<Hashers::H9, true, false>(
num_bytes, position, ringbuffer, ringbuffer_mask,
literal_cost, literal_cost_mask, max_backward_limit, base_min_score,
quality, hashers->hash_h9.get(), dist_cache, last_insert_len,
commands, num_commands);
break;
default:
break;
}
}
} // namespace brotli

View File

@ -0,0 +1,46 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Function to find backward reference copies.
#ifndef BROTLI_ENC_BACKWARD_REFERENCES_H_
#define BROTLI_ENC_BACKWARD_REFERENCES_H_
#include <stdint.h>
#include <vector>
#include "./hash.h"
#include "./command.h"
namespace brotli {
void CreateBackwardReferences(size_t num_bytes,
size_t position,
const uint8_t* ringbuffer,
size_t ringbuffer_mask,
const float* literal_cost,
size_t literal_cost_mask,
const size_t max_backward_limit,
const double base_min_score,
const int quality,
Hashers* hashers,
int hash_type,
int* dist_cache,
int* last_insert_len,
Command* commands,
int* num_commands);
} // namespace brotli
#endif // BROTLI_ENC_BACKWARD_REFERENCES_H_

View File

@ -0,0 +1,168 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Functions to estimate the bit cost of Huffman trees.
#ifndef BROTLI_ENC_BIT_COST_H_
#define BROTLI_ENC_BIT_COST_H_
#include <stdint.h>
#include "./entropy_encode.h"
#include "./fast_log.h"
namespace brotli {
static inline double BitsEntropy(const int *population, int size) {
int sum = 0;
double retval = 0;
const int *population_end = population + size;
int p;
if (size & 1) {
goto odd_number_of_elements_left;
}
while (population < population_end) {
p = *population++;
sum += p;
retval -= p * FastLog2(p);
odd_number_of_elements_left:
p = *population++;
sum += p;
retval -= p * FastLog2(p);
}
if (sum) retval += sum * FastLog2(sum);
if (retval < sum) {
// At least one bit per literal is needed.
retval = sum;
}
return retval;
}
static const int kHuffmanExtraBits[kCodeLengthCodes] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3,
};
static inline int HuffmanTreeBitCost(const int* counts, const uint8_t* depth) {
int nbits = 0;
for (int i = 0; i < kCodeLengthCodes; ++i) {
nbits += counts[i] * (depth[i] + kHuffmanExtraBits[i]);
}
return nbits;
}
static inline int HuffmanTreeBitCost(
const Histogram<kCodeLengthCodes>& histogram,
const EntropyCode<kCodeLengthCodes>& entropy) {
return HuffmanTreeBitCost(&histogram.data_[0], &entropy.depth_[0]);
}
static inline int HuffmanBitCost(const uint8_t* depth, int length) {
int max_depth = 1;
int histogram[kCodeLengthCodes] = { 0 };
int tail_start = 0;
int prev_value = 8;
// compute histogram of compacted huffman tree
for (int i = 0; i < length;) {
const int value = depth[i];
if (value > max_depth) {
max_depth = value;
}
int reps = 1;
for (int k = i + 1; k < length && depth[k] == value; ++k) {
++reps;
}
i += reps;
if (i == length && value == 0)
break;
if (value == 0) {
if (reps < 3) {
histogram[0] += reps;
} else {
reps -= 2;
while (reps > 0) {
++histogram[17];
reps >>= 3;
}
}
} else {
tail_start = i;
if (value != prev_value) {
++histogram[value];
--reps;
}
prev_value = value;
if (reps < 3) {
histogram[value] += reps;
} else {
reps -= 2;
while (reps > 0) {
++histogram[16];
reps >>= 2;
}
}
}
}
// create huffman tree of huffman tree
uint8_t cost[kCodeLengthCodes] = { 0 };
CreateHuffmanTree(histogram, kCodeLengthCodes, 7, cost);
// account for rle extra bits
cost[16] += 2;
cost[17] += 3;
int tree_size = 0;
int bits = 18 + 2 * max_depth; // huffman tree of huffman tree cost
for (int i = 0; i < kCodeLengthCodes; ++i) {
bits += histogram[i] * cost[i]; // huffman tree bit cost
tree_size += histogram[i];
}
return bits;
}
template<int kSize>
double PopulationCost(const Histogram<kSize>& histogram) {
if (histogram.total_count_ == 0) {
return 12;
}
int count = 0;
for (int i = 0; i < kSize && count < 5; ++i) {
if (histogram.data_[i] > 0) {
++count;
}
}
if (count == 1) {
return 12;
}
if (count == 2) {
return 20 + histogram.total_count_;
}
uint8_t depth[kSize] = { 0 };
CreateHuffmanTree(&histogram.data_[0], kSize, 15, depth);
int bits = 0;
for (int i = 0; i < kSize; ++i) {
bits += histogram.data_[i] * depth[i];
}
if (count == 3) {
bits += 28;
} else if (count == 4) {
bits += 37;
} else {
bits += HuffmanBitCost(depth, kSize);
}
return bits;
}
} // namespace brotli
#endif // BROTLI_ENC_BIT_COST_H_

View File

@ -0,0 +1,402 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Block split point selection utilities.
#include "./block_splitter.h"
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <algorithm>
#include <map>
#include "./cluster.h"
#include "./command.h"
#include "./fast_log.h"
#include "./histogram.h"
namespace brotli {
static const int kMaxLiteralHistograms = 100;
static const int kMaxCommandHistograms = 50;
static const double kLiteralBlockSwitchCost = 28.1;
static const double kCommandBlockSwitchCost = 13.5;
static const double kDistanceBlockSwitchCost = 14.6;
static const int kLiteralStrideLength = 70;
static const int kCommandStrideLength = 40;
static const int kSymbolsPerLiteralHistogram = 544;
static const int kSymbolsPerCommandHistogram = 530;
static const int kSymbolsPerDistanceHistogram = 544;
static const int kMinLengthForBlockSplitting = 128;
static const int kIterMulForRefining = 2;
static const int kMinItersForRefining = 100;
void CopyLiteralsToByteArray(const Command* cmds,
const size_t num_commands,
const uint8_t* data,
std::vector<uint8_t>* literals) {
// Count how many we have.
size_t total_length = 0;
for (int i = 0; i < num_commands; ++i) {
total_length += cmds[i].insert_len_;
}
if (total_length == 0) {
return;
}
// Allocate.
literals->resize(total_length);
// Loop again, and copy this time.
size_t pos = 0;
size_t from_pos = 0;
for (int i = 0; i < num_commands && pos < total_length; ++i) {
memcpy(&(*literals)[pos], data + from_pos, cmds[i].insert_len_);
pos += cmds[i].insert_len_;
from_pos += cmds[i].insert_len_ + cmds[i].copy_len_;
}
}
void CopyCommandsToByteArray(const Command* cmds,
const size_t num_commands,
std::vector<uint16_t>* insert_and_copy_codes,
std::vector<uint8_t>* distance_prefixes) {
for (int i = 0; i < num_commands; ++i) {
const Command& cmd = cmds[i];
insert_and_copy_codes->push_back(cmd.cmd_prefix_);
if (cmd.copy_len_ > 0 && cmd.cmd_prefix_ >= 128) {
distance_prefixes->push_back(cmd.dist_prefix_);
}
}
}
inline static unsigned int MyRand(unsigned int* seed) {
*seed *= 16807U;
if (*seed == 0) {
*seed = 1;
}
return *seed;
}
template<typename HistogramType, typename DataType>
void InitialEntropyCodes(const DataType* data, size_t length,
int literals_per_histogram,
int max_histograms,
size_t stride,
std::vector<HistogramType>* vec) {
int total_histograms = length / literals_per_histogram + 1;
if (total_histograms > max_histograms) {
total_histograms = max_histograms;
}
unsigned int seed = 7;
int block_length = length / total_histograms;
for (int i = 0; i < total_histograms; ++i) {
int pos = length * i / total_histograms;
if (i != 0) {
pos += MyRand(&seed) % block_length;
}
if (pos + stride >= length) {
pos = length - stride - 1;
}
HistogramType histo;
histo.Add(data + pos, stride);
vec->push_back(histo);
}
}
template<typename HistogramType, typename DataType>
void RandomSample(unsigned int* seed,
const DataType* data,
size_t length,
size_t stride,
HistogramType* sample) {
size_t pos = 0;
if (stride >= length) {
pos = 0;
stride = length;
} else {
pos = MyRand(seed) % (length - stride + 1);
}
sample->Add(data + pos, stride);
}
template<typename HistogramType, typename DataType>
void RefineEntropyCodes(const DataType* data, size_t length,
size_t stride,
std::vector<HistogramType>* vec) {
int iters =
kIterMulForRefining * length / stride + kMinItersForRefining;
unsigned int seed = 7;
iters = ((iters + vec->size() - 1) / vec->size()) * vec->size();
for (int iter = 0; iter < iters; ++iter) {
HistogramType sample;
RandomSample(&seed, data, length, stride, &sample);
int ix = iter % vec->size();
(*vec)[ix].AddHistogram(sample);
}
}
inline static float BitCost(int total, int count) {
return count == 0 ? FastLog2(total) + 2 : FastLog2(total) - FastLog2(count);
}
template<typename DataType, int kSize>
void FindBlocks(const DataType* data, const size_t length,
const double block_switch_bitcost,
const std::vector<Histogram<kSize> > &vec,
uint8_t *block_id) {
if (vec.size() <= 1) {
for (int i = 0; i < length; ++i) {
block_id[i] = 0;
}
return;
}
int vecsize = vec.size();
double* insert_cost = new double[kSize * vecsize];
memset(insert_cost, 0, sizeof(insert_cost[0]) * kSize * vecsize);
for (int i = 0; i < kSize; ++i) {
for (int j = 0; j < vecsize; ++j) {
insert_cost[i * vecsize + j] =
BitCost(vec[j].total_count_, vec[j].data_[i]);
}
}
double *cost = new double[vecsize];
memset(cost, 0, sizeof(cost[0]) * vecsize);
bool* switch_signal = new bool[length * vecsize];
memset(switch_signal, 0, sizeof(switch_signal[0]) * length * vecsize);
// After each iteration of this loop, cost[k] will contain the difference
// between the minimum cost of arriving at the current byte position using
// entropy code k, and the minimum cost of arriving at the current byte
// position. This difference is capped at the block switch cost, and if it
// reaches block switch cost, it means that when we trace back from the last
// position, we need to switch here.
for (size_t byte_ix = 0; byte_ix < length; ++byte_ix) {
int ix = byte_ix * vecsize;
int insert_cost_ix = data[byte_ix] * vecsize;
double min_cost = 1e99;
for (int k = 0; k < vecsize; ++k) {
// We are coding the symbol in data[byte_ix] with entropy code k.
cost[k] += insert_cost[insert_cost_ix + k];
if (cost[k] < min_cost) {
min_cost = cost[k];
block_id[byte_ix] = k;
}
}
double block_switch_cost = block_switch_bitcost;
// More blocks for the beginning.
if (byte_ix < 2000) {
block_switch_cost *= 0.77 + 0.07 * byte_ix / 2000;
}
for (int k = 0; k < vecsize; ++k) {
cost[k] -= min_cost;
if (cost[k] >= block_switch_cost) {
cost[k] = block_switch_cost;
switch_signal[ix + k] = true;
}
}
}
// Now trace back from the last position and switch at the marked places.
int byte_ix = length - 1;
int ix = byte_ix * vecsize;
int cur_id = block_id[byte_ix];
while (byte_ix > 0) {
--byte_ix;
ix -= vecsize;
if (switch_signal[ix + cur_id]) {
cur_id = block_id[byte_ix];
}
block_id[byte_ix] = cur_id;
}
delete[] insert_cost;
delete[] cost;
delete[] switch_signal;
}
int RemapBlockIds(uint8_t* block_ids, const size_t length) {
std::map<uint8_t, uint8_t> new_id;
int next_id = 0;
for (int i = 0; i < length; ++i) {
if (new_id.find(block_ids[i]) == new_id.end()) {
new_id[block_ids[i]] = next_id;
++next_id;
}
}
for (int i = 0; i < length; ++i) {
block_ids[i] = new_id[block_ids[i]];
}
return next_id;
}
template<typename HistogramType, typename DataType>
void BuildBlockHistograms(const DataType* data, const size_t length,
uint8_t* block_ids,
std::vector<HistogramType>* histograms) {
int num_types = RemapBlockIds(block_ids, length);
histograms->clear();
histograms->resize(num_types);
for (int i = 0; i < length; ++i) {
(*histograms)[block_ids[i]].Add(data[i]);
}
}
template<typename HistogramType, typename DataType>
void ClusterBlocks(const DataType* data, const size_t length,
uint8_t* block_ids) {
std::vector<HistogramType> histograms;
std::vector<int> block_index(length);
int cur_idx = 0;
HistogramType cur_histogram;
for (int i = 0; i < length; ++i) {
bool block_boundary = (i + 1 == length || block_ids[i] != block_ids[i + 1]);
block_index[i] = cur_idx;
cur_histogram.Add(data[i]);
if (block_boundary) {
histograms.push_back(cur_histogram);
cur_histogram.Clear();
++cur_idx;
}
}
std::vector<HistogramType> clustered_histograms;
std::vector<int> histogram_symbols;
// Block ids need to fit in one byte.
static const int kMaxNumberOfBlockTypes = 256;
ClusterHistograms(histograms, 1, histograms.size(),
kMaxNumberOfBlockTypes,
&clustered_histograms,
&histogram_symbols);
for (int i = 0; i < length; ++i) {
block_ids[i] = histogram_symbols[block_index[i]];
}
}
void BuildBlockSplit(const std::vector<uint8_t>& block_ids, BlockSplit* split) {
int cur_id = block_ids[0];
int cur_length = 1;
split->num_types = -1;
for (int i = 1; i < block_ids.size(); ++i) {
if (block_ids[i] != cur_id) {
split->types.push_back(cur_id);
split->lengths.push_back(cur_length);
split->num_types = std::max(split->num_types, cur_id);
cur_id = block_ids[i];
cur_length = 0;
}
++cur_length;
}
split->types.push_back(cur_id);
split->lengths.push_back(cur_length);
split->num_types = std::max(split->num_types, cur_id);
++split->num_types;
}
template<typename HistogramType, typename DataType>
void SplitByteVector(const std::vector<DataType>& data,
const int literals_per_histogram,
const int max_histograms,
const int sampling_stride_length,
const double block_switch_cost,
BlockSplit* split) {
if (data.empty()) {
split->num_types = 1;
return;
} else if (data.size() < kMinLengthForBlockSplitting) {
split->num_types = 1;
split->types.push_back(0);
split->lengths.push_back(data.size());
return;
}
std::vector<HistogramType> histograms;
// Find good entropy codes.
InitialEntropyCodes(data.data(), data.size(),
literals_per_histogram,
max_histograms,
sampling_stride_length,
&histograms);
RefineEntropyCodes(data.data(), data.size(),
sampling_stride_length,
&histograms);
// Find a good path through literals with the good entropy codes.
std::vector<uint8_t> block_ids(data.size());
for (int i = 0; i < 10; ++i) {
FindBlocks(data.data(), data.size(),
block_switch_cost,
histograms,
&block_ids[0]);
BuildBlockHistograms(data.data(), data.size(), &block_ids[0], &histograms);
}
ClusterBlocks<HistogramType>(data.data(), data.size(), &block_ids[0]);
BuildBlockSplit(block_ids, split);
}
void SplitBlock(const Command* cmds,
const size_t num_commands,
const uint8_t* data,
BlockSplit* literal_split,
BlockSplit* insert_and_copy_split,
BlockSplit* dist_split) {
// Create a continuous array of literals.
std::vector<uint8_t> literals;
CopyLiteralsToByteArray(cmds, num_commands, data, &literals);
// Compute prefix codes for commands.
std::vector<uint16_t> insert_and_copy_codes;
std::vector<uint8_t> distance_prefixes;
CopyCommandsToByteArray(cmds, num_commands,
&insert_and_copy_codes,
&distance_prefixes);
SplitByteVector<HistogramLiteral>(
literals,
kSymbolsPerLiteralHistogram, kMaxLiteralHistograms,
kLiteralStrideLength, kLiteralBlockSwitchCost,
literal_split);
SplitByteVector<HistogramCommand>(
insert_and_copy_codes,
kSymbolsPerCommandHistogram, kMaxCommandHistograms,
kCommandStrideLength, kCommandBlockSwitchCost,
insert_and_copy_split);
SplitByteVector<HistogramDistance>(
distance_prefixes,
kSymbolsPerDistanceHistogram, kMaxCommandHistograms,
kCommandStrideLength, kDistanceBlockSwitchCost,
dist_split);
}
void SplitBlockByTotalLength(const Command* all_commands,
const size_t num_commands,
int input_size,
int target_length,
std::vector<std::vector<Command> >* blocks) {
int num_blocks = input_size / target_length + 1;
int length_limit = input_size / num_blocks + 1;
int total_length = 0;
std::vector<Command> cur_block;
for (int i = 0; i < num_commands; ++i) {
const Command& cmd = all_commands[i];
int cmd_length = cmd.insert_len_ + cmd.copy_len_;
if (total_length > length_limit) {
blocks->push_back(cur_block);
cur_block.clear();
total_length = 0;
}
cur_block.push_back(cmd);
total_length += cmd_length;
}
blocks->push_back(cur_block);
}
} // namespace brotli

View File

@ -0,0 +1,74 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Block split point selection utilities.
#ifndef BROTLI_ENC_BLOCK_SPLITTER_H_
#define BROTLI_ENC_BLOCK_SPLITTER_H_
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include <vector>
#include <utility>
#include "./command.h"
#include "./metablock.h"
namespace brotli {
struct BlockSplitIterator {
explicit BlockSplitIterator(const BlockSplit& split)
: split_(split), idx_(0), type_(0), length_(0) {
if (!split.lengths.empty()) {
length_ = split.lengths[0];
}
}
void Next() {
if (length_ == 0) {
++idx_;
type_ = split_.types[idx_];
length_ = split_.lengths[idx_];
}
--length_;
}
const BlockSplit& split_;
int idx_;
int type_;
int length_;
};
void CopyLiteralsToByteArray(const Command* cmds,
const size_t num_commands,
const uint8_t* data,
std::vector<uint8_t>* literals);
void SplitBlock(const Command* cmds,
const size_t num_commands,
const uint8_t* data,
BlockSplit* literal_split,
BlockSplit* insert_and_copy_split,
BlockSplit* dist_split);
void SplitBlockByTotalLength(const Command* all_commands,
const size_t num_commands,
int input_size,
int target_length,
std::vector<std::vector<Command> >* blocks);
} // namespace brotli
#endif // BROTLI_ENC_BLOCK_SPLITTER_H_

View File

@ -0,0 +1,830 @@
// Copyright 2014 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Brotli bit stream functions to support the low level format. There are no
// compression algorithms here, just the right ordering of bits to match the
// specs.
#include "./brotli_bit_stream.h"
#include <algorithm>
#include <limits>
#include <vector>
#include "./bit_cost.h"
#include "./context.h"
#include "./entropy_encode.h"
#include "./fast_log.h"
#include "./prefix.h"
#include "./write_bits.h"
namespace brotli {
// returns false if fail
// nibblesbits represents the 2 bits to encode MNIBBLES (0-3)
bool EncodeMlen(size_t length, int* bits, int* numbits, int* nibblesbits) {
length--; // MLEN - 1 is encoded
int lg = length == 0 ? 1 : Log2Floor(length) + 1;
if (lg > 24) return false;
int mnibbles = (lg < 16 ? 16 : (lg + 3)) / 4;
*nibblesbits = mnibbles - 4;
*numbits = mnibbles * 4;
*bits = length;
return true;
}
void StoreVarLenUint8(int n, int* storage_ix, uint8_t* storage) {
if (n == 0) {
WriteBits(1, 0, storage_ix, storage);
} else {
WriteBits(1, 1, storage_ix, storage);
int nbits = Log2Floor(n);
WriteBits(3, nbits, storage_ix, storage);
WriteBits(nbits, n - (1 << nbits), storage_ix, storage);
}
}
bool StoreCompressedMetaBlockHeader(bool final_block,
size_t length,
int* storage_ix,
uint8_t* storage) {
// Write ISLAST bit.
WriteBits(1, final_block, storage_ix, storage);
// Write ISEMPTY bit.
if (final_block) {
WriteBits(1, length == 0, storage_ix, storage);
if (length == 0) {
return true;
}
}
if (length == 0) {
// Only the last meta-block can be empty.
return false;
}
int lenbits;
int nlenbits;
int nibblesbits;
if (!EncodeMlen(length, &lenbits, &nlenbits, &nibblesbits)) {
return false;
}
WriteBits(2, nibblesbits, storage_ix, storage);
WriteBits(nlenbits, lenbits, storage_ix, storage);
if (!final_block) {
// Write ISUNCOMPRESSED bit.
WriteBits(1, 0, storage_ix, storage);
}
return true;
}
bool StoreUncompressedMetaBlockHeader(size_t length,
int* storage_ix,
uint8_t* storage) {
// Write ISLAST bit. Uncompressed block cannot be the last one, so set to 0.
WriteBits(1, 0, storage_ix, storage);
int lenbits;
int nlenbits;
int nibblesbits;
if (!EncodeMlen(length, &lenbits, &nlenbits, &nibblesbits)) {
return false;
}
WriteBits(2, nibblesbits, storage_ix, storage);
WriteBits(nlenbits, lenbits, storage_ix, storage);
// Write ISUNCOMPRESSED bit.
WriteBits(1, 1, storage_ix, storage);
return true;
}
void StoreHuffmanTreeOfHuffmanTreeToBitMask(
const int num_codes,
const uint8_t *code_length_bitdepth,
int *storage_ix,
uint8_t *storage) {
static const uint8_t kStorageOrder[kCodeLengthCodes] = {
1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15
};
// The bit lengths of the Huffman code over the code length alphabet
// are compressed with the following static Huffman code:
// Symbol Code
// ------ ----
// 0 00
// 1 1110
// 2 110
// 3 01
// 4 10
// 5 1111
static const uint8_t kHuffmanBitLengthHuffmanCodeSymbols[6] = {
0, 7, 3, 2, 1, 15
};
static const uint8_t kHuffmanBitLengthHuffmanCodeBitLengths[6] = {
2, 4, 3, 2, 2, 4
};
// Throw away trailing zeros:
int codes_to_store = kCodeLengthCodes;
if (num_codes > 1) {
for (; codes_to_store > 0; --codes_to_store) {
if (code_length_bitdepth[kStorageOrder[codes_to_store - 1]] != 0) {
break;
}
}
}
int skip_some = 0; // skips none.
if (code_length_bitdepth[kStorageOrder[0]] == 0 &&
code_length_bitdepth[kStorageOrder[1]] == 0) {
skip_some = 2; // skips two.
if (code_length_bitdepth[kStorageOrder[2]] == 0) {
skip_some = 3; // skips three.
}
}
WriteBits(2, skip_some, storage_ix, storage);
for (int i = skip_some; i < codes_to_store; ++i) {
uint8_t l = code_length_bitdepth[kStorageOrder[i]];
WriteBits(kHuffmanBitLengthHuffmanCodeBitLengths[l],
kHuffmanBitLengthHuffmanCodeSymbols[l], storage_ix, storage);
}
}
void StoreHuffmanTreeToBitMask(
const std::vector<uint8_t> &huffman_tree,
const std::vector<uint8_t> &huffman_tree_extra_bits,
const uint8_t *code_length_bitdepth,
const std::vector<uint16_t> &code_length_bitdepth_symbols,
int * __restrict storage_ix,
uint8_t * __restrict storage) {
for (int i = 0; i < huffman_tree.size(); ++i) {
int ix = huffman_tree[i];
WriteBits(code_length_bitdepth[ix], code_length_bitdepth_symbols[ix],
storage_ix, storage);
// Extra bits
switch (ix) {
case 16:
WriteBits(2, huffman_tree_extra_bits[i], storage_ix, storage);
break;
case 17:
WriteBits(3, huffman_tree_extra_bits[i], storage_ix, storage);
break;
}
}
}
void StoreSimpleHuffmanTree(const uint8_t* depths,
int symbols[4],
int num_symbols,
int max_bits,
int *storage_ix, uint8_t *storage) {
// value of 1 indicates a simple Huffman code
WriteBits(2, 1, storage_ix, storage);
WriteBits(2, num_symbols - 1, storage_ix, storage); // NSYM - 1
// Sort
for (int i = 0; i < num_symbols; i++) {
for (int j = i + 1; j < num_symbols; j++) {
if (depths[symbols[j]] < depths[symbols[i]]) {
std::swap(symbols[j], symbols[i]);
}
}
}
if (num_symbols == 2) {
WriteBits(max_bits, symbols[0], storage_ix, storage);
WriteBits(max_bits, symbols[1], storage_ix, storage);
} else if (num_symbols == 3) {
WriteBits(max_bits, symbols[0], storage_ix, storage);
WriteBits(max_bits, symbols[1], storage_ix, storage);
WriteBits(max_bits, symbols[2], storage_ix, storage);
} else {
WriteBits(max_bits, symbols[0], storage_ix, storage);
WriteBits(max_bits, symbols[1], storage_ix, storage);
WriteBits(max_bits, symbols[2], storage_ix, storage);
WriteBits(max_bits, symbols[3], storage_ix, storage);
// tree-select
WriteBits(1, depths[symbols[0]] == 1 ? 1 : 0, storage_ix, storage);
}
}
// num = alphabet size
// depths = symbol depths
void StoreHuffmanTree(const uint8_t* depths, size_t num,
int *storage_ix, uint8_t *storage) {
// Write the Huffman tree into the brotli-representation.
std::vector<uint8_t> huffman_tree;
std::vector<uint8_t> huffman_tree_extra_bits;
// TODO: Consider allocating these from stack.
huffman_tree.reserve(256);
huffman_tree_extra_bits.reserve(256);
WriteHuffmanTree(depths, num, &huffman_tree, &huffman_tree_extra_bits);
// Calculate the statistics of the Huffman tree in brotli-representation.
int huffman_tree_histogram[kCodeLengthCodes] = { 0 };
for (int i = 0; i < huffman_tree.size(); ++i) {
++huffman_tree_histogram[huffman_tree[i]];
}
int num_codes = 0;
int code = 0;
for (int i = 0; i < kCodeLengthCodes; ++i) {
if (huffman_tree_histogram[i]) {
if (num_codes == 0) {
code = i;
num_codes = 1;
} else if (num_codes == 1) {
num_codes = 2;
break;
}
}
}
// Calculate another Huffman tree to use for compressing both the
// earlier Huffman tree with.
// TODO: Consider allocating these from stack.
uint8_t code_length_bitdepth[kCodeLengthCodes] = { 0 };
std::vector<uint16_t> code_length_bitdepth_symbols(kCodeLengthCodes);
CreateHuffmanTree(&huffman_tree_histogram[0], kCodeLengthCodes,
5, &code_length_bitdepth[0]);
ConvertBitDepthsToSymbols(code_length_bitdepth, kCodeLengthCodes,
code_length_bitdepth_symbols.data());
// Now, we have all the data, let's start storing it
StoreHuffmanTreeOfHuffmanTreeToBitMask(num_codes, code_length_bitdepth,
storage_ix, storage);
if (num_codes == 1) {
code_length_bitdepth[code] = 0;
}
// Store the real huffman tree now.
StoreHuffmanTreeToBitMask(huffman_tree,
huffman_tree_extra_bits,
&code_length_bitdepth[0],
code_length_bitdepth_symbols,
storage_ix, storage);
}
void BuildAndStoreHuffmanTree(const int *histogram,
const int length,
uint8_t* depth,
uint16_t* bits,
int* storage_ix,
uint8_t* storage) {
int count = 0;
int s4[4] = { 0 };
for (size_t i = 0; i < length; i++) {
if (histogram[i]) {
if (count < 4) {
s4[count] = i;
} else if (count > 4) {
break;
}
count++;
}
}
int max_bits_counter = length - 1;
int max_bits = 0;
while (max_bits_counter) {
max_bits_counter >>= 1;
++max_bits;
}
if (count <= 1) {
WriteBits(4, 1, storage_ix, storage);
WriteBits(max_bits, s4[0], storage_ix, storage);
return;
}
CreateHuffmanTree(histogram, length, 15, depth);
ConvertBitDepthsToSymbols(depth, length, bits);
if (count <= 4) {
StoreSimpleHuffmanTree(depth, s4, count, max_bits, storage_ix, storage);
} else {
StoreHuffmanTree(depth, length, storage_ix, storage);
}
}
int IndexOf(const std::vector<int>& v, int value) {
for (int i = 0; i < v.size(); ++i) {
if (v[i] == value) return i;
}
return -1;
}
void MoveToFront(std::vector<int>* v, int index) {
int value = (*v)[index];
for (int i = index; i > 0; --i) {
(*v)[i] = (*v)[i - 1];
}
(*v)[0] = value;
}
std::vector<int> MoveToFrontTransform(const std::vector<int>& v) {
if (v.empty()) return v;
std::vector<int> mtf(*std::max_element(v.begin(), v.end()) + 1);
for (int i = 0; i < mtf.size(); ++i) mtf[i] = i;
std::vector<int> result(v.size());
for (int i = 0; i < v.size(); ++i) {
int index = IndexOf(mtf, v[i]);
result[i] = index;
MoveToFront(&mtf, index);
}
return result;
}
// Finds runs of zeros in v_in and replaces them with a prefix code of the run
// length plus extra bits in *v_out and *extra_bits. Non-zero values in v_in are
// shifted by *max_length_prefix. Will not create prefix codes bigger than the
// initial value of *max_run_length_prefix. The prefix code of run length L is
// simply Log2Floor(L) and the number of extra bits is the same as the prefix
// code.
void RunLengthCodeZeros(const std::vector<int>& v_in,
int* max_run_length_prefix,
std::vector<int>* v_out,
std::vector<int>* extra_bits) {
int max_reps = 0;
for (int i = 0; i < v_in.size();) {
for (; i < v_in.size() && v_in[i] != 0; ++i) ;
int reps = 0;
for (; i < v_in.size() && v_in[i] == 0; ++i) {
++reps;
}
max_reps = std::max(reps, max_reps);
}
int max_prefix = max_reps > 0 ? Log2Floor(max_reps) : 0;
*max_run_length_prefix = std::min(max_prefix, *max_run_length_prefix);
for (int i = 0; i < v_in.size();) {
if (v_in[i] != 0) {
v_out->push_back(v_in[i] + *max_run_length_prefix);
extra_bits->push_back(0);
++i;
} else {
int reps = 1;
for (uint32_t k = i + 1; k < v_in.size() && v_in[k] == 0; ++k) {
++reps;
}
i += reps;
while (reps) {
if (reps < (2 << *max_run_length_prefix)) {
int run_length_prefix = Log2Floor(reps);
v_out->push_back(run_length_prefix);
extra_bits->push_back(reps - (1 << run_length_prefix));
break;
} else {
v_out->push_back(*max_run_length_prefix);
extra_bits->push_back((1 << *max_run_length_prefix) - 1);
reps -= (2 << *max_run_length_prefix) - 1;
}
}
}
}
}
// Returns a maximum zero-run-length-prefix value such that run-length coding
// zeros in v with this maximum prefix value and then encoding the resulting
// histogram and entropy-coding v produces the least amount of bits.
int BestMaxZeroRunLengthPrefix(const std::vector<int>& v) {
int min_cost = std::numeric_limits<int>::max();
int best_max_prefix = 0;
for (int max_prefix = 0; max_prefix <= 16; ++max_prefix) {
std::vector<int> rle_symbols;
std::vector<int> extra_bits;
int max_run_length_prefix = max_prefix;
RunLengthCodeZeros(v, &max_run_length_prefix, &rle_symbols, &extra_bits);
if (max_run_length_prefix < max_prefix) break;
HistogramContextMap histogram;
for (int i = 0; i < rle_symbols.size(); ++i) {
histogram.Add(rle_symbols[i]);
}
int bit_cost = PopulationCost(histogram);
if (max_prefix > 0) {
bit_cost += 4;
}
for (int i = 1; i <= max_prefix; ++i) {
bit_cost += histogram.data_[i] * i; // extra bits
}
if (bit_cost < min_cost) {
min_cost = bit_cost;
best_max_prefix = max_prefix;
}
}
return best_max_prefix;
}
void EncodeContextMap(const std::vector<int>& context_map,
int num_clusters,
int* storage_ix, uint8_t* storage) {
StoreVarLenUint8(num_clusters - 1, storage_ix, storage);
if (num_clusters == 1) {
return;
}
std::vector<int> transformed_symbols = MoveToFrontTransform(context_map);
std::vector<int> rle_symbols;
std::vector<int> extra_bits;
int max_run_length_prefix = BestMaxZeroRunLengthPrefix(transformed_symbols);
RunLengthCodeZeros(transformed_symbols, &max_run_length_prefix,
&rle_symbols, &extra_bits);
HistogramContextMap symbol_histogram;
for (int i = 0; i < rle_symbols.size(); ++i) {
symbol_histogram.Add(rle_symbols[i]);
}
bool use_rle = max_run_length_prefix > 0;
WriteBits(1, use_rle, storage_ix, storage);
if (use_rle) {
WriteBits(4, max_run_length_prefix - 1, storage_ix, storage);
}
EntropyCodeContextMap symbol_code;
memset(symbol_code.depth_, 0, sizeof(symbol_code.depth_));
memset(symbol_code.bits_, 0, sizeof(symbol_code.bits_));
BuildAndStoreHuffmanTree(symbol_histogram.data_,
num_clusters + max_run_length_prefix,
symbol_code.depth_, symbol_code.bits_,
storage_ix, storage);
for (int i = 0; i < rle_symbols.size(); ++i) {
WriteBits(symbol_code.depth_[rle_symbols[i]],
symbol_code.bits_[rle_symbols[i]],
storage_ix, storage);
if (rle_symbols[i] > 0 && rle_symbols[i] <= max_run_length_prefix) {
WriteBits(rle_symbols[i], extra_bits[i], storage_ix, storage);
}
}
WriteBits(1, 1, storage_ix, storage); // use move-to-front
}
void StoreBlockSwitch(const BlockSplitCode& code,
const int block_ix,
int* storage_ix,
uint8_t* storage) {
if (block_ix > 0) {
int typecode = code.type_code[block_ix];
WriteBits(code.type_depths[typecode], code.type_bits[typecode],
storage_ix, storage);
}
int lencode = code.length_prefix[block_ix];
WriteBits(code.length_depths[lencode], code.length_bits[lencode],
storage_ix, storage);
WriteBits(code.length_nextra[block_ix], code.length_extra[block_ix],
storage_ix, storage);
}
void BuildAndStoreBlockSplitCode(const std::vector<int>& types,
const std::vector<int>& lengths,
const int num_types,
BlockSplitCode* code,
int* storage_ix,
uint8_t* storage) {
const int num_blocks = types.size();
std::vector<int> type_histo(num_types + 2);
std::vector<int> length_histo(26);
int last_type = 1;
int second_last_type = 0;
code->type_code.resize(num_blocks);
code->length_prefix.resize(num_blocks);
code->length_nextra.resize(num_blocks);
code->length_extra.resize(num_blocks);
code->type_depths.resize(num_types + 2);
code->type_bits.resize(num_types + 2);
code->length_depths.resize(26);
code->length_bits.resize(26);
for (int i = 0; i < num_blocks; ++i) {
int type = types[i];
int type_code = (type == last_type + 1 ? 1 :
type == second_last_type ? 0 :
type + 2);
second_last_type = last_type;
last_type = type;
code->type_code[i] = type_code;
if (i > 0) ++type_histo[type_code];
GetBlockLengthPrefixCode(lengths[i],
&code->length_prefix[i],
&code->length_nextra[i],
&code->length_extra[i]);
++length_histo[code->length_prefix[i]];
}
StoreVarLenUint8(num_types - 1, storage_ix, storage);
if (num_types > 1) {
BuildAndStoreHuffmanTree(&type_histo[0], num_types + 2,
&code->type_depths[0], &code->type_bits[0],
storage_ix, storage);
BuildAndStoreHuffmanTree(&length_histo[0], 26,
&code->length_depths[0], &code->length_bits[0],
storage_ix, storage);
StoreBlockSwitch(*code, 0, storage_ix, storage);
}
}
void StoreTrivialContextMap(int num_types,
int context_bits,
int* storage_ix,
uint8_t* storage) {
StoreVarLenUint8(num_types - 1, storage_ix, storage);
if (num_types > 1) {
int repeat_code = context_bits - 1;
int repeat_bits = (1 << repeat_code) - 1;
int alphabet_size = num_types + repeat_code;
std::vector<int> histogram(alphabet_size);
std::vector<uint8_t> depths(alphabet_size);
std::vector<uint16_t> bits(alphabet_size);
// Write RLEMAX.
WriteBits(1, 1, storage_ix, storage);
WriteBits(4, repeat_code - 1, storage_ix, storage);
histogram[repeat_code] = num_types;
histogram[0] = 1;
for (int i = context_bits; i < alphabet_size; ++i) {
histogram[i] = 1;
}
BuildAndStoreHuffmanTree(&histogram[0], alphabet_size,
&depths[0], &bits[0],
storage_ix, storage);
for (int i = 0; i < num_types; ++i) {
int code = (i == 0 ? 0 : i + context_bits - 1);
WriteBits(depths[code], bits[code], storage_ix, storage);
WriteBits(depths[repeat_code], bits[repeat_code], storage_ix, storage);
WriteBits(repeat_code, repeat_bits, storage_ix, storage);
}
// Write IMTF (inverse-move-to-front) bit.
WriteBits(1, 1, storage_ix, storage);
}
}
// Manages the encoding of one block category (literal, command or distance).
class BlockEncoder {
public:
BlockEncoder(int alphabet_size,
int num_block_types,
const std::vector<int>& block_types,
const std::vector<int>& block_lengths)
: alphabet_size_(alphabet_size),
num_block_types_(num_block_types),
block_types_(block_types),
block_lengths_(block_lengths),
block_ix_(0),
block_len_(block_lengths.empty() ? 0 : block_lengths[0]),
entropy_ix_(0) {}
// Creates entropy codes of block lengths and block types and stores them
// to the bit stream.
void BuildAndStoreBlockSwitchEntropyCodes(int* storage_ix, uint8_t* storage) {
BuildAndStoreBlockSplitCode(
block_types_, block_lengths_, num_block_types_,
&block_split_code_, storage_ix, storage);
}
// Creates entropy codes for all block types and stores them to the bit
// stream.
template<int kSize>
void BuildAndStoreEntropyCodes(
const std::vector<Histogram<kSize> >& histograms,
int* storage_ix, uint8_t* storage) {
depths_.resize(histograms.size() * alphabet_size_);
bits_.resize(histograms.size() * alphabet_size_);
for (int i = 0; i < histograms.size(); ++i) {
int ix = i * alphabet_size_;
BuildAndStoreHuffmanTree(&histograms[i].data_[0], alphabet_size_,
&depths_[ix], &bits_[ix],
storage_ix, storage);
}
}
// Stores the next symbol with the entropy code of the current block type.
// Updates the block type and block length at block boundaries.
void StoreSymbol(int symbol, int* storage_ix, uint8_t* storage) {
if (block_len_ == 0) {
++block_ix_;
block_len_ = block_lengths_[block_ix_];
entropy_ix_ = block_types_[block_ix_] * alphabet_size_;
StoreBlockSwitch(block_split_code_, block_ix_, storage_ix, storage);
}
--block_len_;
int ix = entropy_ix_ + symbol;
WriteBits(depths_[ix], bits_[ix], storage_ix, storage);
}
// Stores the next symbol with the entropy code of the current block type and
// context value.
// Updates the block type and block length at block boundaries.
template<int kContextBits>
void StoreSymbolWithContext(int symbol, int context,
const std::vector<int>& context_map,
int* storage_ix, uint8_t* storage) {
if (block_len_ == 0) {
++block_ix_;
block_len_ = block_lengths_[block_ix_];
entropy_ix_ = block_types_[block_ix_] << kContextBits;
StoreBlockSwitch(block_split_code_, block_ix_, storage_ix, storage);
}
--block_len_;
int histo_ix = context_map[entropy_ix_ + context];
int ix = histo_ix * alphabet_size_ + symbol;
WriteBits(depths_[ix], bits_[ix], storage_ix, storage);
}
private:
const int alphabet_size_;
const int num_block_types_;
const std::vector<int>& block_types_;
const std::vector<int>& block_lengths_;
BlockSplitCode block_split_code_;
int block_ix_;
int block_len_;
int entropy_ix_;
std::vector<uint8_t> depths_;
std::vector<uint16_t> bits_;
};
void JumpToByteBoundary(int* storage_ix, uint8_t* storage) {
*storage_ix = (*storage_ix + 7) & ~7;
storage[*storage_ix >> 3] = 0;
}
bool StoreMetaBlock(const uint8_t* input,
size_t start_pos,
size_t length,
size_t mask,
uint8_t prev_byte,
uint8_t prev_byte2,
bool is_last,
int num_direct_distance_codes,
int distance_postfix_bits,
int literal_context_mode,
const brotli::Command *commands,
size_t n_commands,
const MetaBlockSplit& mb,
int *storage_ix,
uint8_t *storage) {
if (!StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage)) {
return false;
}
if (length == 0) {
// Only the last meta-block can be empty, so jump to next byte.
JumpToByteBoundary(storage_ix, storage);
return true;
}
int num_distance_codes =
kNumDistanceShortCodes + num_direct_distance_codes +
(48 << distance_postfix_bits);
BlockEncoder literal_enc(256,
mb.literal_split.num_types,
mb.literal_split.types,
mb.literal_split.lengths);
BlockEncoder command_enc(kNumCommandPrefixes,
mb.command_split.num_types,
mb.command_split.types,
mb.command_split.lengths);
BlockEncoder distance_enc(num_distance_codes,
mb.distance_split.num_types,
mb.distance_split.types,
mb.distance_split.lengths);
literal_enc.BuildAndStoreBlockSwitchEntropyCodes(storage_ix, storage);
command_enc.BuildAndStoreBlockSwitchEntropyCodes(storage_ix, storage);
distance_enc.BuildAndStoreBlockSwitchEntropyCodes(storage_ix, storage);
WriteBits(2, distance_postfix_bits, storage_ix, storage);
WriteBits(4, num_direct_distance_codes >> distance_postfix_bits,
storage_ix, storage);
for (int i = 0; i < mb.literal_split.num_types; ++i) {
WriteBits(2, literal_context_mode, storage_ix, storage);
}
if (mb.literal_context_map.empty()) {
StoreTrivialContextMap(mb.literal_histograms.size(), kLiteralContextBits,
storage_ix, storage);
} else {
EncodeContextMap(mb.literal_context_map, mb.literal_histograms.size(),
storage_ix, storage);
}
if (mb.distance_context_map.empty()) {
StoreTrivialContextMap(mb.distance_histograms.size(), kDistanceContextBits,
storage_ix, storage);
} else {
EncodeContextMap(mb.distance_context_map, mb.distance_histograms.size(),
storage_ix, storage);
}
literal_enc.BuildAndStoreEntropyCodes(mb.literal_histograms,
storage_ix, storage);
command_enc.BuildAndStoreEntropyCodes(mb.command_histograms,
storage_ix, storage);
distance_enc.BuildAndStoreEntropyCodes(mb.distance_histograms,
storage_ix, storage);
size_t pos = start_pos;
for (int i = 0; i < n_commands; ++i) {
const Command cmd = commands[i];
int cmd_code = cmd.cmd_prefix_;
int lennumextra = cmd.cmd_extra_ >> 48;
uint64_t lenextra = cmd.cmd_extra_ & 0xffffffffffffULL;
command_enc.StoreSymbol(cmd_code, storage_ix, storage);
WriteBits(lennumextra, lenextra, storage_ix, storage);
if (mb.literal_context_map.empty()) {
for (int j = 0; j < cmd.insert_len_; j++) {
literal_enc.StoreSymbol(input[pos & mask], storage_ix, storage);
++pos;
}
} else {
for (int j = 0; j < cmd.insert_len_; ++j) {
int context = Context(prev_byte, prev_byte2,
literal_context_mode);
int literal = input[pos & mask];
literal_enc.StoreSymbolWithContext<kLiteralContextBits>(
literal, context, mb.literal_context_map, storage_ix, storage);
prev_byte2 = prev_byte;
prev_byte = literal;
++pos;
}
}
pos += cmd.copy_len_;
if (cmd.copy_len_ > 0) {
prev_byte2 = input[(pos - 2) & mask];
prev_byte = input[(pos - 1) & mask];
if (cmd.cmd_prefix_ >= 128) {
int dist_code = cmd.dist_prefix_;
int distnumextra = cmd.dist_extra_ >> 24;
int distextra = cmd.dist_extra_ & 0xffffff;
if (mb.distance_context_map.empty()) {
distance_enc.StoreSymbol(dist_code, storage_ix, storage);
} else {
int context = cmd.DistanceContext();
distance_enc.StoreSymbolWithContext<kDistanceContextBits>(
dist_code, context, mb.distance_context_map, storage_ix, storage);
}
brotli::WriteBits(distnumextra, distextra, storage_ix, storage);
}
}
}
if (is_last) {
JumpToByteBoundary(storage_ix, storage);
}
return true;
}
// This is for storing uncompressed blocks (simple raw storage of
// bytes-as-bytes).
bool StoreUncompressedMetaBlock(bool final_block,
const uint8_t * __restrict input,
size_t position, size_t mask,
size_t len,
int * __restrict storage_ix,
uint8_t * __restrict storage) {
if (!brotli::StoreUncompressedMetaBlockHeader(len, storage_ix, storage)) {
return false;
}
JumpToByteBoundary(storage_ix, storage);
size_t masked_pos = position & mask;
if (masked_pos + len > mask + 1) {
size_t len1 = mask + 1 - masked_pos;
memcpy(&storage[*storage_ix >> 3], &input[masked_pos], len1);
*storage_ix += len1 << 3;
len -= len1;
masked_pos = 0;
}
memcpy(&storage[*storage_ix >> 3], &input[masked_pos], len);
*storage_ix += len << 3;
// We need to clear the next 4 bytes to continue to be
// compatible with WriteBits.
brotli::WriteBitsPrepareStorage(*storage_ix, storage);
// Since the uncomressed block itself may not be the final block, add an empty
// one after this.
if (final_block) {
brotli::WriteBits(1, 1, storage_ix, storage); // islast
brotli::WriteBits(1, 1, storage_ix, storage); // isempty
JumpToByteBoundary(storage_ix, storage);
}
return true;
}
void StoreSyncMetaBlock(int * __restrict storage_ix,
uint8_t * __restrict storage) {
// Empty metadata meta-block bit pattern:
// 1 bit: is_last (0)
// 2 bits: num nibbles (3)
// 1 bit: reserved (0)
// 2 bits: metadata length bytes (0)
WriteBits(6, 6, storage_ix, storage);
JumpToByteBoundary(storage_ix, storage);
}
} // namespace brotli

View File

@ -0,0 +1,137 @@
// Copyright 2014 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Functions to convert brotli-related data structures into the
// brotli bit stream. The functions here operate under
// assumption that there is enough space in the storage, i.e., there are
// no out-of-range checks anywhere.
//
// These functions do bit addressing into a byte array. The byte array
// is called "storage" and the index to the bit is called storage_ix
// in function arguments.
#ifndef BROTLI_ENC_BROTLI_BIT_STREAM_H_
#define BROTLI_ENC_BROTLI_BIT_STREAM_H_
#include <stddef.h>
#include <stdint.h>
#include <vector>
#include "./metablock.h"
namespace brotli {
// All Store functions here will use a storage_ix, which is always the bit
// position for the current storage.
// Stores a number between 0 and 255.
void StoreVarLenUint8(int n, int* storage_ix, uint8_t* storage);
// Stores the compressed meta-block header.
bool StoreCompressedMetaBlockHeader(bool final_block,
size_t length,
int* storage_ix,
uint8_t* storage);
// Stores the uncompressed meta-block header.
bool StoreUncompressedMetaBlockHeader(size_t length,
int* storage_ix,
uint8_t* storage);
// Stores a context map where the histogram type is always the block type.
void StoreTrivialContextMap(int num_types,
int context_bits,
int* storage_ix,
uint8_t* storage);
void StoreHuffmanTreeOfHuffmanTreeToBitMask(
const int num_codes,
const uint8_t *code_length_bitdepth,
int *storage_ix,
uint8_t *storage);
// Builds a Huffman tree from histogram[0:length] into depth[0:length] and
// bits[0:length] and stores the encoded tree to the bit stream.
void BuildAndStoreHuffmanTree(const int *histogram,
const int length,
uint8_t* depth,
uint16_t* bits,
int* storage_ix,
uint8_t* storage);
// Encodes the given context map to the bit stream. The number of different
// histogram ids is given by num_clusters.
void EncodeContextMap(const std::vector<int>& context_map,
int num_clusters,
int* storage_ix, uint8_t* storage);
// Data structure that stores everything that is needed to encode each block
// switch command.
struct BlockSplitCode {
std::vector<int> type_code;
std::vector<int> length_prefix;
std::vector<int> length_nextra;
std::vector<int> length_extra;
std::vector<uint8_t> type_depths;
std::vector<uint16_t> type_bits;
std::vector<uint8_t> length_depths;
std::vector<uint16_t> length_bits;
};
// Builds a BlockSplitCode data structure from the block split given by the
// vector of block types and block lengths and stores it to the bit stream.
void BuildAndStoreBlockSplitCode(const std::vector<int>& types,
const std::vector<int>& lengths,
const int num_types,
BlockSplitCode* code,
int* storage_ix,
uint8_t* storage);
// Stores the block switch command with index block_ix to the bit stream.
void StoreBlockSwitch(const BlockSplitCode& code,
const int block_ix,
int* storage_ix,
uint8_t* storage);
bool StoreMetaBlock(const uint8_t* input,
size_t start_pos,
size_t length,
size_t mask,
uint8_t prev_byte,
uint8_t prev_byte2,
bool final_block,
int num_direct_distance_codes,
int distance_postfix_bits,
int literal_context_mode,
const brotli::Command *commands,
size_t n_commands,
const MetaBlockSplit& mb,
int *storage_ix,
uint8_t *storage);
// This is for storing uncompressed blocks (simple raw storage of
// bytes-as-bytes).
bool StoreUncompressedMetaBlock(bool final_block,
const uint8_t* input,
size_t position, size_t mask,
size_t len,
int* storage_ix,
uint8_t* storage);
// Stores an empty metadata meta-block and syncs to a byte boundary.
void StoreSyncMetaBlock(int* storage_ix, uint8_t* storage);
} // namespace brotli
#endif // BROTLI_ENC_BROTLI_BIT_STREAM_H_

View File

@ -0,0 +1,305 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Functions for clustering similar histograms together.
#ifndef BROTLI_ENC_CLUSTER_H_
#define BROTLI_ENC_CLUSTER_H_
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <algorithm>
#include <complex>
#include <map>
#include <set>
#include <utility>
#include <vector>
#include "./bit_cost.h"
#include "./entropy_encode.h"
#include "./fast_log.h"
#include "./histogram.h"
namespace brotli {
struct HistogramPair {
int idx1;
int idx2;
bool valid;
double cost_combo;
double cost_diff;
};
struct HistogramPairComparator {
bool operator()(const HistogramPair& p1, const HistogramPair& p2) const {
if (p1.cost_diff != p2.cost_diff) {
return p1.cost_diff > p2.cost_diff;
}
return abs(p1.idx1 - p1.idx2) > abs(p2.idx1 - p2.idx2);
}
};
// Returns entropy reduction of the context map when we combine two clusters.
inline double ClusterCostDiff(int size_a, int size_b) {
int size_c = size_a + size_b;
return size_a * FastLog2(size_a) + size_b * FastLog2(size_b) -
size_c * FastLog2(size_c);
}
// Computes the bit cost reduction by combining out[idx1] and out[idx2] and if
// it is below a threshold, stores the pair (idx1, idx2) in the *pairs heap.
template<typename HistogramType>
void CompareAndPushToHeap(const HistogramType* out,
const int* cluster_size,
int idx1, int idx2,
std::vector<HistogramPair>* pairs) {
if (idx1 == idx2) {
return;
}
if (idx2 < idx1) {
int t = idx2;
idx2 = idx1;
idx1 = t;
}
bool store_pair = false;
HistogramPair p;
p.idx1 = idx1;
p.idx2 = idx2;
p.valid = true;
p.cost_diff = 0.5 * ClusterCostDiff(cluster_size[idx1], cluster_size[idx2]);
p.cost_diff -= out[idx1].bit_cost_;
p.cost_diff -= out[idx2].bit_cost_;
if (out[idx1].total_count_ == 0) {
p.cost_combo = out[idx2].bit_cost_;
store_pair = true;
} else if (out[idx2].total_count_ == 0) {
p.cost_combo = out[idx1].bit_cost_;
store_pair = true;
} else {
double threshold = pairs->empty() ? 1e99 :
std::max(0.0, (*pairs)[0].cost_diff);
HistogramType combo = out[idx1];
combo.AddHistogram(out[idx2]);
double cost_combo = PopulationCost(combo);
if (cost_combo < threshold - p.cost_diff) {
p.cost_combo = cost_combo;
store_pair = true;
}
}
if (store_pair) {
p.cost_diff += p.cost_combo;
pairs->push_back(p);
std::push_heap(pairs->begin(), pairs->end(), HistogramPairComparator());
}
}
template<typename HistogramType>
void HistogramCombine(HistogramType* out,
int* cluster_size,
int* symbols,
int symbols_size,
int max_clusters) {
double cost_diff_threshold = 0.0;
int min_cluster_size = 1;
std::set<int> all_symbols;
std::vector<int> clusters;
for (int i = 0; i < symbols_size; ++i) {
if (all_symbols.find(symbols[i]) == all_symbols.end()) {
all_symbols.insert(symbols[i]);
clusters.push_back(symbols[i]);
}
}
// We maintain a heap of histogram pairs, ordered by the bit cost reduction.
std::vector<HistogramPair> pairs;
for (int idx1 = 0; idx1 < clusters.size(); ++idx1) {
for (int idx2 = idx1 + 1; idx2 < clusters.size(); ++idx2) {
CompareAndPushToHeap(out, cluster_size, clusters[idx1], clusters[idx2],
&pairs);
}
}
while (clusters.size() > min_cluster_size) {
if (pairs[0].cost_diff >= cost_diff_threshold) {
cost_diff_threshold = 1e99;
min_cluster_size = max_clusters;
continue;
}
// Take the best pair from the top of heap.
int best_idx1 = pairs[0].idx1;
int best_idx2 = pairs[0].idx2;
out[best_idx1].AddHistogram(out[best_idx2]);
out[best_idx1].bit_cost_ = pairs[0].cost_combo;
cluster_size[best_idx1] += cluster_size[best_idx2];
for (int i = 0; i < symbols_size; ++i) {
if (symbols[i] == best_idx2) {
symbols[i] = best_idx1;
}
}
for (int i = 0; i + 1 < clusters.size(); ++i) {
if (clusters[i] >= best_idx2) {
clusters[i] = clusters[i + 1];
}
}
clusters.pop_back();
// Invalidate pairs intersecting the just combined best pair.
for (int i = 0; i < pairs.size(); ++i) {
HistogramPair& p = pairs[i];
if (p.idx1 == best_idx1 || p.idx2 == best_idx1 ||
p.idx1 == best_idx2 || p.idx2 == best_idx2) {
p.valid = false;
}
}
// Pop invalid pairs from the top of the heap.
while (!pairs.empty() && !pairs[0].valid) {
std::pop_heap(pairs.begin(), pairs.end(), HistogramPairComparator());
pairs.pop_back();
}
// Push new pairs formed with the combined histogram to the heap.
for (int i = 0; i < clusters.size(); ++i) {
CompareAndPushToHeap(out, cluster_size, best_idx1, clusters[i], &pairs);
}
}
}
// -----------------------------------------------------------------------------
// Histogram refinement
// What is the bit cost of moving histogram from cur_symbol to candidate.
template<typename HistogramType>
double HistogramBitCostDistance(const HistogramType& histogram,
const HistogramType& candidate) {
if (histogram.total_count_ == 0) {
return 0.0;
}
HistogramType tmp = histogram;
tmp.AddHistogram(candidate);
return PopulationCost(tmp) - candidate.bit_cost_;
}
// Find the best 'out' histogram for each of the 'in' histograms.
// Note: we assume that out[]->bit_cost_ is already up-to-date.
template<typename HistogramType>
void HistogramRemap(const HistogramType* in, int in_size,
HistogramType* out, int* symbols) {
std::set<int> all_symbols;
for (int i = 0; i < in_size; ++i) {
all_symbols.insert(symbols[i]);
}
for (int i = 0; i < in_size; ++i) {
int best_out = i == 0 ? symbols[0] : symbols[i - 1];
double best_bits = HistogramBitCostDistance(in[i], out[best_out]);
for (std::set<int>::const_iterator k = all_symbols.begin();
k != all_symbols.end(); ++k) {
const double cur_bits = HistogramBitCostDistance(in[i], out[*k]);
if (cur_bits < best_bits) {
best_bits = cur_bits;
best_out = *k;
}
}
symbols[i] = best_out;
}
// Recompute each out based on raw and symbols.
for (std::set<int>::const_iterator k = all_symbols.begin();
k != all_symbols.end(); ++k) {
out[*k].Clear();
}
for (int i = 0; i < in_size; ++i) {
out[symbols[i]].AddHistogram(in[i]);
}
}
// Reorder histograms in *out so that the new symbols in *symbols come in
// increasing order.
template<typename HistogramType>
void HistogramReindex(std::vector<HistogramType>* out,
std::vector<int>* symbols) {
std::vector<HistogramType> tmp(*out);
std::map<int, int> new_index;
int next_index = 0;
for (int i = 0; i < symbols->size(); ++i) {
if (new_index.find((*symbols)[i]) == new_index.end()) {
new_index[(*symbols)[i]] = next_index;
(*out)[next_index] = tmp[(*symbols)[i]];
++next_index;
}
}
out->resize(next_index);
for (int i = 0; i < symbols->size(); ++i) {
(*symbols)[i] = new_index[(*symbols)[i]];
}
}
template<typename HistogramType>
void ClusterHistogramsTrivial(const std::vector<HistogramType>& in,
int num_contexts, int num_blocks,
int max_histograms,
std::vector<HistogramType>* out,
std::vector<int>* histogram_symbols) {
out->resize(num_blocks);
for (int i = 0; i < num_blocks; ++i) {
(*out)[i].Clear();
for (int j = 0; j < num_contexts; ++j) {
(*out)[i].AddHistogram(in[i * num_contexts + j]);
histogram_symbols->push_back(i);
}
}
}
// Clusters similar histograms in 'in' together, the selected histograms are
// placed in 'out', and for each index in 'in', *histogram_symbols will
// indicate which of the 'out' histograms is the best approximation.
template<typename HistogramType>
void ClusterHistograms(const std::vector<HistogramType>& in,
int num_contexts, int num_blocks,
int max_histograms,
std::vector<HistogramType>* out,
std::vector<int>* histogram_symbols) {
const int in_size = num_contexts * num_blocks;
std::vector<int> cluster_size(in_size, 1);
out->resize(in_size);
histogram_symbols->resize(in_size);
for (int i = 0; i < in_size; ++i) {
(*out)[i] = in[i];
(*out)[i].bit_cost_ = PopulationCost(in[i]);
(*histogram_symbols)[i] = i;
}
// Collapse similar histograms within a block type.
if (num_contexts > 1) {
for (int i = 0; i < num_blocks; ++i) {
HistogramCombine(&(*out)[0], &cluster_size[0],
&(*histogram_symbols)[i * num_contexts], num_contexts,
max_histograms);
}
}
// Collapse similar histograms.
HistogramCombine(&(*out)[0], &cluster_size[0],
&(*histogram_symbols)[0], in_size,
max_histograms);
// Find the optimal map from original histograms to the final ones.
HistogramRemap(&in[0], in_size, &(*out)[0], &(*histogram_symbols)[0]);
// Convert the context map to a canonical form.
HistogramReindex(out, histogram_symbols);
}
} // namespace brotli
#endif // BROTLI_ENC_CLUSTER_H_

View File

@ -0,0 +1,150 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This class models a sequence of literals and a backward reference copy.
#ifndef BROTLI_ENC_COMMAND_H_
#define BROTLI_ENC_COMMAND_H_
#include <stdint.h>
#include "./fast_log.h"
namespace brotli {
static inline void GetDistCode(int distance_code,
uint16_t* code, uint32_t* extra) {
distance_code -= 1;
if (distance_code < 16) {
*code = distance_code;
*extra = 0;
} else {
distance_code -= 12;
int numextra = Log2FloorNonZero(distance_code) - 1;
int prefix = distance_code >> numextra;
*code = 12 + 2 * numextra + prefix;
*extra = (numextra << 24) | (distance_code - (prefix << numextra));
}
}
static int insbase[] = { 0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26, 34, 50, 66,
98, 130, 194, 322, 578, 1090, 2114, 6210, 22594 };
static int insextra[] = { 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5,
5, 6, 7, 8, 9, 10, 12, 14, 24 };
static int copybase[] = { 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 18, 22, 30, 38,
54, 70, 102, 134, 198, 326, 582, 1094, 2118 };
static int copyextra[] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4,
4, 5, 5, 6, 7, 8, 9, 10, 24 };
static inline int GetInsertLengthCode(int insertlen) {
if (insertlen < 6) {
return insertlen;
} else if (insertlen < 130) {
insertlen -= 2;
int nbits = Log2FloorNonZero(insertlen) - 1;
return (nbits << 1) + (insertlen >> nbits) + 2;
} else if (insertlen < 2114) {
return Log2FloorNonZero(insertlen - 66) + 10;
} else if (insertlen < 6210) {
return 21;
} else if (insertlen < 22594) {
return 22;
} else {
return 23;
}
}
static inline int GetCopyLengthCode(int copylen) {
if (copylen < 10) {
return copylen - 2;
} else if (copylen < 134) {
copylen -= 6;
int nbits = Log2FloorNonZero(copylen) - 1;
return (nbits << 1) + (copylen >> nbits) + 4;
} else if (copylen < 2118) {
return Log2FloorNonZero(copylen - 70) + 12;
} else {
return 23;
}
}
static inline int CombineLengthCodes(
int inscode, int copycode, int distancecode) {
int bits64 = (copycode & 0x7u) | ((inscode & 0x7u) << 3);
if (distancecode == 0 && inscode < 8 && copycode < 16) {
return (copycode < 8) ? bits64 : (bits64 | 64);
} else {
// "To convert an insert-and-copy length code to an insert length code and
// a copy length code, the following table can be used"
static const int cells[9] = { 2, 3, 6, 4, 5, 8, 7, 9, 10 };
return (cells[(copycode >> 3) + 3 * (inscode >> 3)] << 6) | bits64;
}
}
static inline void GetLengthCode(int insertlen, int copylen, int distancecode,
uint16_t* code, uint64_t* extra) {
int inscode = GetInsertLengthCode(insertlen);
int copycode = GetCopyLengthCode(copylen);
uint64_t insnumextra = insextra[inscode];
uint64_t numextra = insnumextra + copyextra[copycode];
uint64_t insextraval = insertlen - insbase[inscode];
uint64_t copyextraval = copylen - copybase[copycode];
*code = CombineLengthCodes(inscode, copycode, distancecode);
*extra = (numextra << 48) | (copyextraval << insnumextra) | insextraval;
}
struct Command {
Command() {}
Command(int insertlen, int copylen, int copylen_code, int distance_code)
: insert_len_(insertlen), copy_len_(copylen) {
GetDistCode(distance_code, &dist_prefix_, &dist_extra_);
GetLengthCode(insertlen, copylen_code, dist_prefix_,
&cmd_prefix_, &cmd_extra_);
}
Command(int insertlen)
: insert_len_(insertlen), copy_len_(0), dist_prefix_(16), dist_extra_(0) {
GetLengthCode(insertlen, 4, dist_prefix_, &cmd_prefix_, &cmd_extra_);
}
int DistanceCode() const {
if (dist_prefix_ < 16) {
return dist_prefix_ + 1;
}
int nbits = dist_extra_ >> 24;
int extra = dist_extra_ & 0xffffff;
int prefix = dist_prefix_ - 12 - 2 * nbits;
return (prefix << nbits) + extra + 13;
}
int DistanceContext() const {
int r = cmd_prefix_ >> 6;
int c = cmd_prefix_ & 7;
if ((r == 0 || r == 2 || r == 4 || r == 7) && (c <= 2)) {
return c;
}
return 3;
}
int insert_len_;
int copy_len_;
uint16_t cmd_prefix_;
uint16_t dist_prefix_;
uint64_t cmd_extra_;
uint32_t dist_extra_;
};
} // namespace brotli
#endif // BROTLI_ENC_COMMAND_H_

View File

@ -0,0 +1,185 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Functions to map previous bytes into a context id.
#ifndef BROTLI_ENC_CONTEXT_H_
#define BROTLI_ENC_CONTEXT_H_
#include <stdint.h>
namespace brotli {
// Second-order context lookup table for UTF8 byte streams.
//
// If p1 and p2 are the previous two bytes, we calcualte the context as
//
// context = kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256].
//
// If the previous two bytes are ASCII characters (i.e. < 128), this will be
// equivalent to
//
// context = 4 * context1(p1) + context2(p2),
//
// where context1 is based on the previous byte in the following way:
//
// 0 : non-ASCII control
// 1 : \t, \n, \r
// 2 : space
// 3 : other punctuation
// 4 : " '
// 5 : %
// 6 : ( < [ {
// 7 : ) > ] }
// 8 : , ; :
// 9 : .
// 10 : =
// 11 : number
// 12 : upper-case vowel
// 13 : upper-case consonant
// 14 : lower-case vowel
// 15 : lower-case consonant
//
// and context2 is based on the second last byte:
//
// 0 : control, space
// 1 : punctuation
// 2 : upper-case letter, number
// 3 : lower-case letter
//
// If the last byte is ASCII, and the second last byte is not (in a valid UTF8
// stream it will be a continuation byte, value between 128 and 191), the
// context is the same as if the second last byte was an ASCII control or space.
//
// If the last byte is a UTF8 lead byte (value >= 192), then the next byte will
// be a continuation byte and the context id is 2 or 3 depending on the LSB of
// the last byte and to a lesser extent on the second last byte if it is ASCII.
//
// If the last byte is a UTF8 continuation byte, the second last byte can be:
// - continuation byte: the next byte is probably ASCII or lead byte (assuming
// 4-byte UTF8 characters are rare) and the context id is 0 or 1.
// - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1
// - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3
//
// The possible value combinations of the previous two bytes, the range of
// context ids and the type of the next byte is summarized in the table below:
//
// |--------\-----------------------------------------------------------------|
// | \ Last byte |
// | Second \---------------------------------------------------------------|
// | last byte \ ASCII | cont. byte | lead byte |
// | \ (0-127) | (128-191) | (192-) |
// |=============|===================|=====================|==================|
// | ASCII | next: ASCII/lead | not valid | next: cont. |
// | (0-127) | context: 4 - 63 | | context: 2 - 3 |
// |-------------|-------------------|---------------------|------------------|
// | cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. |
// | (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 |
// |-------------|-------------------|---------------------|------------------|
// | lead byte | not valid | next: ASCII/lead | not valid |
// | (192-207) | | context: 0 - 1 | |
// |-------------|-------------------|---------------------|------------------|
// | lead byte | not valid | next: cont. | not valid |
// | (208-) | | context: 2 - 3 | |
// |-------------|-------------------|---------------------|------------------|
static const uint8_t kUTF8ContextLookup[512] = {
// Last byte.
//
// ASCII range.
0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12,
44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12,
12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48,
52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12,
12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56,
60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0,
// UTF8 continuation byte range.
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
// UTF8 lead byte range.
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
// Second last byte.
//
// ASCII range.
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
// UTF8 continuation byte range.
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
// UTF8 lead byte range.
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
};
// Context lookup table for small signed integers.
static const uint8_t kSigned3BitContextLookup[] = {
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
};
enum ContextType {
CONTEXT_LSB6 = 0,
CONTEXT_MSB6 = 1,
CONTEXT_UTF8 = 2,
CONTEXT_SIGNED = 3
};
static inline uint8_t Context(uint8_t p1, uint8_t p2, int mode) {
switch (mode) {
case CONTEXT_LSB6:
return p1 & 0x3f;
case CONTEXT_MSB6:
return p1 >> 2;
case CONTEXT_UTF8:
return kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256];
case CONTEXT_SIGNED:
return (kSigned3BitContextLookup[p1] << 3) + kSigned3BitContextLookup[p2];
default:
return 0;
}
}
} // namespace brotli
#endif // BROTLI_ENC_CONTEXT_H_

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,589 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Implementation of Brotli compressor.
#include "./encode.h"
#include <algorithm>
#include <limits>
#include "./backward_references.h"
#include "./bit_cost.h"
#include "./block_splitter.h"
#include "./brotli_bit_stream.h"
#include "./cluster.h"
#include "./context.h"
#include "./metablock.h"
#include "./transform.h"
#include "./entropy_encode.h"
#include "./fast_log.h"
#include "./hash.h"
#include "./histogram.h"
#include "./literal_cost.h"
#include "./prefix.h"
#include "./write_bits.h"
namespace brotli {
static const double kMinUTF8Ratio = 0.75;
int ParseAsUTF8(int* symbol, const uint8_t* input, int size) {
// ASCII
if ((input[0] & 0x80) == 0) {
*symbol = input[0];
if (*symbol > 0) {
return 1;
}
}
// 2-byte UTF8
if (size > 1 &&
(input[0] & 0xe0) == 0xc0 &&
(input[1] & 0xc0) == 0x80) {
*symbol = (((input[0] & 0x1f) << 6) |
(input[1] & 0x3f));
if (*symbol > 0x7f) {
return 2;
}
}
// 3-byte UFT8
if (size > 2 &&
(input[0] & 0xf0) == 0xe0 &&
(input[1] & 0xc0) == 0x80 &&
(input[2] & 0xc0) == 0x80) {
*symbol = (((input[0] & 0x0f) << 12) |
((input[1] & 0x3f) << 6) |
(input[2] & 0x3f));
if (*symbol > 0x7ff) {
return 3;
}
}
// 4-byte UFT8
if (size > 3 &&
(input[0] & 0xf8) == 0xf0 &&
(input[1] & 0xc0) == 0x80 &&
(input[2] & 0xc0) == 0x80 &&
(input[3] & 0xc0) == 0x80) {
*symbol = (((input[0] & 0x07) << 18) |
((input[1] & 0x3f) << 12) |
((input[2] & 0x3f) << 6) |
(input[3] & 0x3f));
if (*symbol > 0xffff && *symbol <= 0x10ffff) {
return 4;
}
}
// Not UTF8, emit a special symbol above the UTF8-code space
*symbol = 0x110000 | input[0];
return 1;
}
// Returns true if at least min_fraction of the data is UTF8-encoded.
bool IsMostlyUTF8(const uint8_t* data, size_t length, double min_fraction) {
size_t size_utf8 = 0;
size_t pos = 0;
while (pos < length) {
int symbol;
int bytes_read = ParseAsUTF8(&symbol, data + pos, length - pos);
pos += bytes_read;
if (symbol < 0x110000) size_utf8 += bytes_read;
}
return size_utf8 > min_fraction * length;
}
void RecomputeDistancePrefixes(Command* cmds,
size_t num_commands,
int num_direct_distance_codes,
int distance_postfix_bits) {
if (num_direct_distance_codes == 0 &&
distance_postfix_bits == 0) {
return;
}
for (int i = 0; i < num_commands; ++i) {
Command* cmd = &cmds[i];
if (cmd->copy_len_ > 0 && cmd->cmd_prefix_ >= 128) {
PrefixEncodeCopyDistance(cmd->DistanceCode(),
num_direct_distance_codes,
distance_postfix_bits,
&cmd->dist_prefix_,
&cmd->dist_extra_);
}
}
}
uint8_t* BrotliCompressor::GetBrotliStorage(size_t size) {
if (storage_size_ < size) {
storage_.reset(new uint8_t[size]);
storage_size_ = size;
}
return &storage_[0];
}
BrotliCompressor::BrotliCompressor(BrotliParams params)
: params_(params),
hashers_(new Hashers()),
input_pos_(0),
num_commands_(0),
last_insert_len_(0),
last_flush_pos_(0),
last_processed_pos_(0),
prev_byte_(0),
prev_byte2_(0),
storage_size_(0) {
// Sanitize params.
params_.quality = std::max(0, params_.quality);
if (params_.lgwin < kMinWindowBits) {
params_.lgwin = kMinWindowBits;
} else if (params_.lgwin > kMaxWindowBits) {
params_.lgwin = kMaxWindowBits;
}
if (params_.lgblock == 0) {
params_.lgblock = 16;
if (params_.quality >= 9 && params_.lgwin > params_.lgblock) {
params_.lgblock = std::min(21, params_.lgwin);
}
} else {
params_.lgblock = std::min(kMaxInputBlockBits,
std::max(kMinInputBlockBits, params_.lgblock));
}
if (params_.quality <= 9) {
params_.enable_dictionary = false;
params_.enable_transforms = false;
params_.greedy_block_split = true;
params_.enable_context_modeling = false;
}
// Set maximum distance, see section 9.1. of the spec.
max_backward_distance_ = (1 << params_.lgwin) - 16;
// Initialize input and literal cost ring buffers.
// We allocate at least lgwin + 1 bits for the ring buffer so that the newly
// added block fits there completely and we still get lgwin bits and at least
// read_block_size_bits + 1 bits because the copy tail length needs to be
// smaller than ringbuffer size.
int ringbuffer_bits = std::max(params_.lgwin + 1, params_.lgblock + 1);
ringbuffer_.reset(new RingBuffer(ringbuffer_bits, params_.lgblock));
if (params_.quality > 9) {
literal_cost_mask_ = (1 << params_.lgblock) - 1;
literal_cost_.reset(new float[literal_cost_mask_ + 1]);
}
// Allocate command buffer.
cmd_buffer_size_ = std::max(1 << 18, 1 << params_.lgblock);
commands_.reset(new brotli::Command[cmd_buffer_size_]);
// Initialize last byte with stream header.
if (params_.lgwin == 16) {
last_byte_ = 0;
last_byte_bits_ = 1;
} else {
last_byte_ = ((params_.lgwin - 17) << 1) | 1;
last_byte_bits_ = 4;
}
// Initialize distance cache.
dist_cache_[0] = 4;
dist_cache_[1] = 11;
dist_cache_[2] = 15;
dist_cache_[3] = 16;
// Initialize hashers.
switch (params_.quality) {
case 0:
case 1: hash_type_ = 1; break;
case 2:
case 3: hash_type_ = 2; break;
case 4: hash_type_ = 3; break;
case 5:
case 6: hash_type_ = 4; break;
case 7: hash_type_ = 5; break;
case 8: hash_type_ = 6; break;
case 9: hash_type_ = 7; break;
default: // quality > 9
hash_type_ = (params_.mode == BrotliParams::MODE_TEXT) ? 8 : 9;
}
hashers_->Init(hash_type_);
if (params_.mode == BrotliParams::MODE_TEXT &&
params_.enable_dictionary) {
StoreDictionaryWordHashes(params_.enable_transforms);
}
}
BrotliCompressor::~BrotliCompressor() {
}
StaticDictionary* BrotliCompressor::static_dictionary_ = NULL;
void BrotliCompressor::StoreDictionaryWordHashes(bool enable_transforms) {
if (static_dictionary_ == NULL) {
static_dictionary_ = new StaticDictionary;
static_dictionary_->Fill(enable_transforms);
}
hashers_->SetStaticDictionary(static_dictionary_);
}
void BrotliCompressor::CopyInputToRingBuffer(const size_t input_size,
const uint8_t* input_buffer) {
ringbuffer_->Write(input_buffer, input_size);
input_pos_ += input_size;
// Erase a few more bytes in the ring buffer to make hashing not
// depend on uninitialized data. This makes compression deterministic
// and it prevents uninitialized memory warnings in Valgrind. Even
// without erasing, the output would be valid (but nondeterministic).
//
// Background information: The compressor stores short (at most 8 bytes)
// substrings of the input already read in a hash table, and detects
// repetitions by looking up such substrings in the hash table. If it
// can find a substring, it checks whether the substring is really there
// in the ring buffer (or it's just a hash collision). Should the hash
// table become corrupt, this check makes sure that the output is
// still valid, albeit the compression ratio would be bad.
//
// The compressor populates the hash table from the ring buffer as it's
// reading new bytes from the input. However, at the last few indexes of
// the ring buffer, there are not enough bytes to build full-length
// substrings from. Since the hash table always contains full-length
// substrings, we erase with dummy 0s here to make sure that those
// substrings will contain 0s at the end instead of uninitialized
// data.
//
// Please note that erasing is not necessary (because the
// memory region is already initialized since he ring buffer
// has a `tail' that holds a copy of the beginning,) so we
// skip erasing if we have already gone around at least once in
// the ring buffer.
size_t pos = ringbuffer_->position();
// Only clear during the first round of ringbuffer writes. On
// subsequent rounds data in the ringbuffer would be affected.
if (pos <= ringbuffer_->mask()) {
// This is the first time when the ring buffer is being written.
// We clear 3 bytes just after the bytes that have been copied from
// the input buffer.
//
// The ringbuffer has a "tail" that holds a copy of the beginning,
// but only once the ring buffer has been fully written once, i.e.,
// pos <= mask. For the first time, we need to write values
// in this tail (where index may be larger than mask), so that
// we have exactly defined behavior and don't read un-initialized
// memory. Due to performance reasons, hashing reads data using a
// LOAD32, which can go 3 bytes beyond the bytes written in the
// ringbuffer.
memset(ringbuffer_->start() + pos, 0, 3);
}
}
bool BrotliCompressor::WriteBrotliData(const bool is_last,
const bool force_flush,
size_t* out_size,
uint8_t** output) {
const size_t bytes = input_pos_ - last_processed_pos_;
const uint8_t* data = ringbuffer_->start();
const size_t mask = ringbuffer_->mask();
if (bytes > input_block_size()) {
return false;
}
bool utf8_mode =
params_.enable_context_modeling &&
IsMostlyUTF8(&data[last_processed_pos_ & mask], bytes, kMinUTF8Ratio);
if (literal_cost_.get()) {
if (utf8_mode) {
EstimateBitCostsForLiteralsUTF8(last_processed_pos_, bytes, mask,
literal_cost_mask_, data,
literal_cost_.get());
} else {
EstimateBitCostsForLiterals(last_processed_pos_, bytes, mask,
literal_cost_mask_,
data, literal_cost_.get());
}
}
double base_min_score = params_.enable_context_modeling ? 8.115 : 4.0;
CreateBackwardReferences(bytes, last_processed_pos_, data, mask,
literal_cost_.get(),
literal_cost_mask_,
max_backward_distance_,
base_min_score,
params_.quality,
hashers_.get(),
hash_type_,
dist_cache_,
&last_insert_len_,
&commands_[num_commands_],
&num_commands_);
if (!is_last && !force_flush &&
num_commands_ + (input_block_size() >> 1) < cmd_buffer_size_ &&
input_pos_ + input_block_size() <= last_flush_pos_ + mask + 1) {
// Everything will happen later.
last_processed_pos_ = input_pos_;
*out_size = 0;
return true;
}
// Create the last insert-only command.
if (last_insert_len_ > 0) {
brotli::Command cmd(last_insert_len_);
commands_[num_commands_++] = cmd;
last_insert_len_ = 0;
}
return WriteMetaBlockInternal(is_last, utf8_mode, out_size, output);
}
bool BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
const bool utf8_mode,
size_t* out_size,
uint8_t** output) {
const size_t bytes = input_pos_ - last_flush_pos_;
const uint8_t* data = ringbuffer_->start();
const size_t mask = ringbuffer_->mask();
const size_t max_out_size = 2 * bytes + 500;
uint8_t* storage = GetBrotliStorage(max_out_size);
storage[0] = last_byte_;
int storage_ix = last_byte_bits_;
bool uncompressed = false;
if (num_commands_ < (bytes >> 8) + 2) {
int num_literals = 0;
for (int i = 0; i < num_commands_; ++i) {
num_literals += commands_[i].insert_len_;
}
if (num_literals > 0.99 * bytes) {
int literal_histo[256] = { 0 };
static const int kSampleRate = 13;
static const double kMinEntropy = 7.92;
static const double kBitCostThreshold = bytes * kMinEntropy / kSampleRate;
for (int i = last_flush_pos_; i < input_pos_; i += kSampleRate) {
++literal_histo[data[i & mask]];
}
if (BitsEntropy(literal_histo, 256) > kBitCostThreshold) {
uncompressed = true;
}
}
}
if (bytes == 0) {
if (!StoreCompressedMetaBlockHeader(is_last, 0, &storage_ix, &storage[0])) {
return false;
}
storage_ix = (storage_ix + 7) & ~7;
} else if (uncompressed) {
if (!StoreUncompressedMetaBlock(is_last,
data, last_flush_pos_, mask, bytes,
&storage_ix,
&storage[0])) {
return false;
}
} else {
// Save the state of the distance cache in case we need to restore it for
// emitting an uncompressed block.
int saved_dist_cache[4];
memcpy(saved_dist_cache, dist_cache_, sizeof(dist_cache_));
int num_direct_distance_codes = 0;
int distance_postfix_bits = 0;
if (params_.quality > 9 && params_.mode == BrotliParams::MODE_FONT) {
num_direct_distance_codes = 12;
distance_postfix_bits = 1;
RecomputeDistancePrefixes(commands_.get(),
num_commands_,
num_direct_distance_codes,
distance_postfix_bits);
}
int literal_context_mode = utf8_mode ? CONTEXT_UTF8 : CONTEXT_SIGNED;
MetaBlockSplit mb;
if (params_.greedy_block_split) {
BuildMetaBlockGreedy(data, last_flush_pos_, mask,
commands_.get(), num_commands_,
&mb);
} else {
BuildMetaBlock(data, last_flush_pos_, mask,
prev_byte_, prev_byte2_,
commands_.get(), num_commands_,
literal_context_mode,
params_.enable_context_modeling,
&mb);
}
if (params_.quality >= 3) {
OptimizeHistograms(num_direct_distance_codes,
distance_postfix_bits,
&mb);
}
if (!StoreMetaBlock(data, last_flush_pos_, bytes, mask,
prev_byte_, prev_byte2_,
is_last,
num_direct_distance_codes,
distance_postfix_bits,
literal_context_mode,
commands_.get(), num_commands_,
mb,
&storage_ix,
&storage[0])) {
return false;
}
if (bytes + 4 < (storage_ix >> 3)) {
// Restore the distance cache and last byte.
memcpy(dist_cache_, saved_dist_cache, sizeof(dist_cache_));
storage[0] = last_byte_;
storage_ix = last_byte_bits_;
if (!StoreUncompressedMetaBlock(is_last, data, last_flush_pos_, mask,
bytes, &storage_ix, &storage[0])) {
return false;
}
}
}
last_byte_ = storage[storage_ix >> 3];
last_byte_bits_ = storage_ix & 7;
last_flush_pos_ = input_pos_;
last_processed_pos_ = input_pos_;
prev_byte_ = data[(last_flush_pos_ - 1) & mask];
prev_byte2_ = data[(last_flush_pos_ - 2) & mask];
num_commands_ = 0;
*output = &storage[0];
*out_size = storage_ix >> 3;
return true;
}
bool BrotliCompressor::WriteMetaBlock(const size_t input_size,
const uint8_t* input_buffer,
const bool is_last,
size_t* encoded_size,
uint8_t* encoded_buffer) {
CopyInputToRingBuffer(input_size, input_buffer);
size_t out_size = 0;
uint8_t* output;
if (!WriteBrotliData(is_last, /* force_flush = */ true, &out_size, &output) ||
out_size > *encoded_size) {
return false;
}
if (out_size > 0) {
memcpy(encoded_buffer, output, out_size);
}
*encoded_size = out_size;
return true;
}
bool BrotliCompressor::WriteMetadata(const size_t input_size,
const uint8_t* input_buffer,
const bool is_last,
size_t* encoded_size,
uint8_t* encoded_buffer) {
if (input_size > (1 << 24) || input_size + 6 > *encoded_size) {
return false;
}
int storage_ix = last_byte_bits_;
encoded_buffer[0] = last_byte_;
WriteBits(1, 0, &storage_ix, encoded_buffer);
WriteBits(2, 3, &storage_ix, encoded_buffer);
WriteBits(1, 0, &storage_ix, encoded_buffer);
if (input_size == 0) {
WriteBits(2, 0, &storage_ix, encoded_buffer);
*encoded_size = (storage_ix + 7) >> 3;
} else {
size_t nbits = Log2Floor(input_size - 1) + 1;
size_t nbytes = (nbits + 7) / 8;
WriteBits(2, nbytes, &storage_ix, encoded_buffer);
WriteBits(8 * nbytes, input_size - 1, &storage_ix, encoded_buffer);
size_t hdr_size = (storage_ix + 7) >> 3;
memcpy(&encoded_buffer[hdr_size], input_buffer, input_size);
*encoded_size = hdr_size + input_size;
}
if (is_last) {
encoded_buffer[(*encoded_size)++] = 3;
}
last_byte_ = 0;
last_byte_bits_ = 0;
return true;
}
bool BrotliCompressor::FinishStream(
size_t* encoded_size, uint8_t* encoded_buffer) {
return WriteMetaBlock(0, NULL, true, encoded_size, encoded_buffer);
}
int BrotliCompressBuffer(BrotliParams params,
size_t input_size,
const uint8_t* input_buffer,
size_t* encoded_size,
uint8_t* encoded_buffer) {
if (*encoded_size == 0) {
// Output buffer needs at least one byte.
return 0;
}
BrotliCompressor compressor(params);
BrotliMemIn in(input_buffer, input_size);
BrotliMemOut out(encoded_buffer, *encoded_size);
if (!BrotliCompress(params, &in, &out)) {
return 0;
}
*encoded_size = out.position();
return 1;
}
size_t CopyOneBlockToRingBuffer(BrotliIn* r, BrotliCompressor* compressor) {
const size_t block_size = compressor->input_block_size();
size_t bytes_read = 0;
const uint8_t* data = reinterpret_cast<const uint8_t*>(
r->Read(block_size, &bytes_read));
if (data == NULL) {
return 0;
}
compressor->CopyInputToRingBuffer(bytes_read, data);
// Read more bytes until block_size is filled or an EOF (data == NULL) is
// received. This is useful to get deterministic compressed output for the
// same input no matter how r->Read splits the input to chunks.
for (size_t remaining = block_size - bytes_read; remaining > 0; ) {
size_t more_bytes_read = 0;
data = reinterpret_cast<const uint8_t*>(
r->Read(remaining, &more_bytes_read));
if (data == NULL) {
break;
}
compressor->CopyInputToRingBuffer(more_bytes_read, data);
bytes_read += more_bytes_read;
remaining -= more_bytes_read;
}
return bytes_read;
}
bool BrotliInIsFinished(BrotliIn* r) {
size_t read_bytes;
return r->Read(0, &read_bytes) == NULL;
}
int BrotliCompress(BrotliParams params, BrotliIn* in, BrotliOut* out) {
size_t in_bytes = 0;
size_t out_bytes = 0;
uint8_t* output;
bool final_block = false;
BrotliCompressor compressor(params);
while (!final_block) {
in_bytes = CopyOneBlockToRingBuffer(in, &compressor);
final_block = in_bytes == 0 || BrotliInIsFinished(in);
out_bytes = 0;
if (!compressor.WriteBrotliData(final_block,
/* force_flush = */ false,
&out_bytes, &output)) {
return false;
}
if (out_bytes > 0 && !out->Write(output, out_bytes)) {
return false;
}
}
return true;
}
} // namespace brotli

View File

@ -0,0 +1,179 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// API for Brotli compression
#ifndef BROTLI_ENC_ENCODE_H_
#define BROTLI_ENC_ENCODE_H_
#include <stddef.h>
#include <stdint.h>
#include <string>
#include <vector>
#include "./command.h"
#include "./hash.h"
#include "./ringbuffer.h"
#include "./static_dict.h"
#include "./streams.h"
namespace brotli {
static const int kMaxWindowBits = 24;
static const int kMinWindowBits = 16;
static const int kMinInputBlockBits = 16;
static const int kMaxInputBlockBits = 24;
struct BrotliParams {
BrotliParams()
: mode(MODE_TEXT),
quality(11),
lgwin(22),
lgblock(0),
enable_dictionary(true),
enable_transforms(false),
greedy_block_split(false),
enable_context_modeling(true) {}
enum Mode {
MODE_TEXT = 0,
MODE_FONT = 1,
};
Mode mode;
// Controls the compression-speed vs compression-density tradeoffs. The higher
// the quality, the slower the compression. Range is 0 to 11.
int quality;
// Base 2 logarithm of the sliding window size. Range is 16 to 24.
int lgwin;
// Base 2 logarithm of the maximum input block size. Range is 16 to 24.
// If set to 0, the value will be set based on the quality.
int lgblock;
// These settings will be respected only if quality > 9.
bool enable_dictionary;
bool enable_transforms;
bool greedy_block_split;
bool enable_context_modeling;
};
// An instance can not be reused for multiple brotli streams.
class BrotliCompressor {
public:
explicit BrotliCompressor(BrotliParams params);
~BrotliCompressor();
// The maximum input size that can be processed at once.
size_t input_block_size() const { return 1 << params_.lgblock; }
// Encodes the data in input_buffer as a meta-block and writes it to
// encoded_buffer (*encoded_size should be set to the size of
// encoded_buffer) and sets *encoded_size to the number of bytes that
// was written. Returns 0 if there was an error and 1 otherwise.
bool WriteMetaBlock(const size_t input_size,
const uint8_t* input_buffer,
const bool is_last,
size_t* encoded_size,
uint8_t* encoded_buffer);
// Writes a metadata meta-block containing the given input to encoded_buffer.
// *encoded_size should be set to the size of the encoded_buffer.
// Sets *encoded_size to the number of bytes that was written.
// Note that the given input data will not be part of the sliding window and
// thus no backward references can be made to this data from subsequent
// metablocks.
bool WriteMetadata(const size_t input_size,
const uint8_t* input_buffer,
const bool is_last,
size_t* encoded_size,
uint8_t* encoded_buffer);
// Writes a zero-length meta-block with end-of-input bit set to the
// internal output buffer and copies the output buffer to encoded_buffer
// (*encoded_size should be set to the size of encoded_buffer) and sets
// *encoded_size to the number of bytes written. Returns false if there was
// an error and true otherwise.
bool FinishStream(size_t* encoded_size, uint8_t* encoded_buffer);
// Copies the given input data to the internal ring buffer of the compressor.
// No processing of the data occurs at this time and this function can be
// called multiple times before calling WriteBrotliData() to process the
// accumulated input. At most input_block_size() bytes of input data can be
// copied to the ring buffer, otherwise the next WriteBrotliData() will fail.
void CopyInputToRingBuffer(const size_t input_size,
const uint8_t* input_buffer);
// Processes the accumulated input data and sets *out_size to the length of
// the new output meta-block, or to zero if no new output meta-block was
// created (in this case the processed input data is buffered internally).
// If *out_size is positive, *output points to the start of the output data.
// Returns false if the size of the input data is larger than
// input_block_size() or if there was an error during writing the output.
// If is_last or force_flush is true, an output meta-block is always created.
bool WriteBrotliData(const bool is_last, const bool force_flush,
size_t* out_size, uint8_t** output);
// No-op, but we keep it here for API backward-compatibility.
void WriteStreamHeader() {}
private:
// Initializes the hasher with the hashes of dictionary words.
void StoreDictionaryWordHashes(bool enable_transforms);
uint8_t* GetBrotliStorage(size_t size);
bool WriteMetaBlockInternal(const bool is_last,
const bool utf8_mode,
size_t* out_size,
uint8_t** output);
BrotliParams params_;
int max_backward_distance_;
std::unique_ptr<Hashers> hashers_;
int hash_type_;
size_t input_pos_;
std::unique_ptr<RingBuffer> ringbuffer_;
std::unique_ptr<float[]> literal_cost_;
size_t literal_cost_mask_;
size_t cmd_buffer_size_;
std::unique_ptr<Command[]> commands_;
int num_commands_;
int last_insert_len_;
size_t last_flush_pos_;
size_t last_processed_pos_;
int dist_cache_[4];
uint8_t last_byte_;
uint8_t last_byte_bits_;
uint8_t prev_byte_;
uint8_t prev_byte2_;
int storage_size_;
std::unique_ptr<uint8_t[]> storage_;
static StaticDictionary *static_dictionary_;
};
// Compresses the data in input_buffer into encoded_buffer, and sets
// *encoded_size to the compressed length.
// Returns 0 if there was an error and 1 otherwise.
int BrotliCompressBuffer(BrotliParams params,
size_t input_size,
const uint8_t* input_buffer,
size_t* encoded_size,
uint8_t* encoded_buffer);
// Same as above, but uses the specified input and output classes instead
// of reading from and writing to pre-allocated memory buffers.
int BrotliCompress(BrotliParams params, BrotliIn* in, BrotliOut* out);
} // namespace brotli
#endif // BROTLI_ENC_ENCODE_H_

View File

@ -0,0 +1,356 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Implementation of parallel Brotli compressor.
#include "./encode_parallel.h"
#include <algorithm>
#include <limits>
#include "./backward_references.h"
#include "./bit_cost.h"
#include "./block_splitter.h"
#include "./brotli_bit_stream.h"
#include "./cluster.h"
#include "./context.h"
#include "./metablock.h"
#include "./transform.h"
#include "./entropy_encode.h"
#include "./fast_log.h"
#include "./hash.h"
#include "./histogram.h"
#include "./literal_cost.h"
#include "./prefix.h"
#include "./write_bits.h"
namespace brotli {
namespace {
int ParseAsUTF8(int* symbol, const uint8_t* input, int size) {
// ASCII
if ((input[0] & 0x80) == 0) {
*symbol = input[0];
if (*symbol > 0) {
return 1;
}
}
// 2-byte UTF8
if (size > 1 &&
(input[0] & 0xe0) == 0xc0 &&
(input[1] & 0xc0) == 0x80) {
*symbol = (((input[0] & 0x1f) << 6) |
(input[1] & 0x3f));
if (*symbol > 0x7f) {
return 2;
}
}
// 3-byte UFT8
if (size > 2 &&
(input[0] & 0xf0) == 0xe0 &&
(input[1] & 0xc0) == 0x80 &&
(input[2] & 0xc0) == 0x80) {
*symbol = (((input[0] & 0x0f) << 12) |
((input[1] & 0x3f) << 6) |
(input[2] & 0x3f));
if (*symbol > 0x7ff) {
return 3;
}
}
// 4-byte UFT8
if (size > 3 &&
(input[0] & 0xf8) == 0xf0 &&
(input[1] & 0xc0) == 0x80 &&
(input[2] & 0xc0) == 0x80 &&
(input[3] & 0xc0) == 0x80) {
*symbol = (((input[0] & 0x07) << 18) |
((input[1] & 0x3f) << 12) |
((input[2] & 0x3f) << 6) |
(input[3] & 0x3f));
if (*symbol > 0xffff && *symbol <= 0x10ffff) {
return 4;
}
}
// Not UTF8, emit a special symbol above the UTF8-code space
*symbol = 0x110000 | input[0];
return 1;
}
// Returns true if at least min_fraction of the data is UTF8-encoded.
bool IsMostlyUTF8(const uint8_t* data, size_t length, double min_fraction) {
size_t size_utf8 = 0;
for (size_t pos = 0; pos < length; ) {
int symbol;
int bytes_read = ParseAsUTF8(&symbol, data + pos, length - pos);
pos += bytes_read;
if (symbol < 0x110000) size_utf8 += bytes_read;
}
return size_utf8 > min_fraction * length;
}
void RecomputeDistancePrefixes(std::vector<Command>* cmds,
int num_direct_distance_codes,
int distance_postfix_bits) {
if (num_direct_distance_codes == 0 &&
distance_postfix_bits == 0) {
return;
}
for (int i = 0; i < cmds->size(); ++i) {
Command* cmd = &(*cmds)[i];
if (cmd->copy_len_ > 0 && cmd->cmd_prefix_ >= 128) {
PrefixEncodeCopyDistance(cmd->DistanceCode(),
num_direct_distance_codes,
distance_postfix_bits,
&cmd->dist_prefix_,
&cmd->dist_extra_);
}
}
}
bool WriteMetaBlockParallel(const BrotliParams& params,
const size_t block_size,
const uint8_t* input_buffer,
const size_t prefix_size,
const uint8_t* prefix_buffer,
const StaticDictionary* static_dict,
const bool is_first,
const bool is_last,
size_t* encoded_size,
uint8_t* encoded_buffer) {
if (block_size == 0) {
return false;
}
const size_t input_size = block_size;
// Copy prefix + next input block into a continuous area.
size_t input_pos = prefix_size;
std::vector<uint8_t> input(prefix_size + input_size);
memcpy(&input[0], prefix_buffer, prefix_size);
memcpy(&input[input_pos], input_buffer, input_size);
// Since we don't have a ringbuffer, masking is a no-op.
// We use one less bit than the full range because some of the code uses
// mask + 1 as the size of the ringbuffer.
const size_t mask = std::numeric_limits<size_t>::max() >> 1;
uint8_t prev_byte = input_pos > 0 ? input[(input_pos - 1) & mask] : 0;
uint8_t prev_byte2 = input_pos > 1 ? input[(input_pos - 2) & mask] : 0;
// Decide about UTF8 mode.
static const double kMinUTF8Ratio = 0.75;
bool utf8_mode = IsMostlyUTF8(&input[input_pos], input_size, kMinUTF8Ratio);
// Compute literal costs.
std::vector<float> literal_cost(prefix_size + input_size);
if (utf8_mode) {
EstimateBitCostsForLiteralsUTF8(input_pos, input_size, mask, mask,
&input[0], &literal_cost[0]);
} else {
EstimateBitCostsForLiterals(input_pos, input_size, mask, mask,
&input[0], &literal_cost[0]);
}
// Initialize hashers.
int hash_type = 9;
switch (params.mode) {
case BrotliParams::MODE_TEXT: hash_type = 8; break;
case BrotliParams::MODE_FONT: hash_type = 9; break;
default: break;
}
std::unique_ptr<Hashers> hashers(new Hashers());
hashers->Init(hash_type);
hashers->SetStaticDictionary(static_dict);
// Compute backward references.
int last_insert_len = 0;
int num_commands = 0;
double base_min_score = 8.115;
int max_backward_distance = (1 << params.lgwin) - 16;
int dist_cache[4] = { -4, -4, -4, -4 };
std::vector<Command> commands((input_size + 1) >> 1);
CreateBackwardReferences(
input_size, input_pos,
&input[0], mask,
&literal_cost[0], mask,
max_backward_distance,
base_min_score,
params.quality,
hashers.get(),
hash_type,
dist_cache,
&last_insert_len,
&commands[0],
&num_commands);
commands.resize(num_commands);
if (last_insert_len > 0) {
commands.push_back(Command(last_insert_len));
}
// Build the meta-block.
MetaBlockSplit mb;
int num_direct_distance_codes =
params.mode == BrotliParams::MODE_FONT ? 12 : 0;
int distance_postfix_bits = params.mode == BrotliParams::MODE_FONT ? 1 : 0;
int literal_context_mode = utf8_mode ? CONTEXT_UTF8 : CONTEXT_SIGNED;
RecomputeDistancePrefixes(&commands,
num_direct_distance_codes,
distance_postfix_bits);
if (params.greedy_block_split) {
BuildMetaBlockGreedy(&input[0], input_pos, mask,
commands.data(), commands.size(),
&mb);
} else {
BuildMetaBlock(&input[0], input_pos, mask,
prev_byte, prev_byte2,
commands.data(), commands.size(),
literal_context_mode,
true,
&mb);
}
// Set up the temporary output storage.
const size_t max_out_size = 2 * input_size + 500;
std::vector<uint8_t> storage(max_out_size);
int first_byte = 0;
int first_byte_bits = 0;
if (is_first) {
if (params.lgwin == 16) {
first_byte = 0;
first_byte_bits = 1;
} else {
first_byte = ((params.lgwin - 17) << 1) | 1;
first_byte_bits = 4;
}
}
storage[0] = first_byte;
int storage_ix = first_byte_bits;
// Store the meta-block to the temporary output.
if (!StoreMetaBlock(&input[0], input_pos, input_size, mask,
prev_byte, prev_byte2,
is_last,
num_direct_distance_codes,
distance_postfix_bits,
literal_context_mode,
commands.data(), commands.size(),
mb,
&storage_ix, &storage[0])) {
return false;
}
// If this is not the last meta-block, store an empty metadata
// meta-block so that the meta-block will end at a byte boundary.
if (!is_last) {
StoreSyncMetaBlock(&storage_ix, &storage[0]);
}
// If the compressed data is too large, fall back to an uncompressed
// meta-block.
size_t output_size = storage_ix >> 3;
if (input_size + 4 < output_size) {
storage[0] = first_byte;
storage_ix = first_byte_bits;
if (!StoreUncompressedMetaBlock(is_last, &input[0], input_pos, mask,
input_size,
&storage_ix, &storage[0])) {
return false;
}
output_size = storage_ix >> 3;
}
// Copy the temporary output with size-check to the output.
if (output_size > *encoded_size) {
return false;
}
memcpy(encoded_buffer, &storage[0], output_size);
*encoded_size = output_size;
return true;
}
} // namespace
int BrotliCompressBufferParallel(BrotliParams params,
size_t input_size,
const uint8_t* input_buffer,
size_t* encoded_size,
uint8_t* encoded_buffer) {
if (*encoded_size == 0) {
// Output buffer needs at least one byte.
return 0;
} else if (input_size == 0) {
encoded_buffer[0] = 6;
*encoded_size = 1;
return 1;
}
// Sanitize params.
if (params.lgwin < kMinWindowBits) {
params.lgwin = kMinWindowBits;
} else if (params.lgwin > kMaxWindowBits) {
params.lgwin = kMaxWindowBits;
}
if (params.lgblock == 0) {
params.lgblock = 16;
if (params.quality >= 9 && params.lgwin > params.lgblock) {
params.lgblock = std::min(21, params.lgwin);
}
} else if (params.lgblock < kMinInputBlockBits) {
params.lgblock = kMinInputBlockBits;
} else if (params.lgblock > kMaxInputBlockBits) {
params.lgblock = kMaxInputBlockBits;
}
size_t max_input_block_size = 1 << params.lgblock;
std::vector<std::vector<uint8_t> > compressed_pieces;
StaticDictionary dict;
dict.Fill(params.enable_transforms);
// Compress block-by-block independently.
for (size_t pos = 0; pos < input_size; ) {
size_t input_block_size = std::min(max_input_block_size, input_size - pos);
size_t out_size = 1.2 * input_block_size + 1024;
std::vector<uint8_t> out(out_size);
if (!WriteMetaBlockParallel(params,
input_block_size,
&input_buffer[pos],
pos,
input_buffer,
&dict,
pos == 0,
pos + input_block_size == input_size,
&out_size,
&out[0])) {
return false;
}
out.resize(out_size);
compressed_pieces.push_back(out);
pos += input_block_size;
}
// Piece together the output.
size_t out_pos = 0;
for (int i = 0; i < compressed_pieces.size(); ++i) {
const std::vector<uint8_t>& out = compressed_pieces[i];
if (out_pos + out.size() > *encoded_size) {
return false;
}
memcpy(&encoded_buffer[out_pos], &out[0], out.size());
out_pos += out.size();
}
*encoded_size = out_pos;
return true;
}
} // namespace brotli

View File

@ -0,0 +1,37 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// API for parallel Brotli compression
// Note that this is only a proof of concept currently and not part of the
// final API yet.
#ifndef BROTLI_ENC_ENCODE_PARALLEL_H_
#define BROTLI_ENC_ENCODE_PARALLEL_H_
#include <stddef.h>
#include <stdint.h>
#include "./encode.h"
namespace brotli {
int BrotliCompressBufferParallel(BrotliParams params,
size_t input_size,
const uint8_t* input_buffer,
size_t* encoded_size,
uint8_t* encoded_buffer);
} // namespace brotli
#endif // BROTLI_ENC_ENCODE_PARALLEL_H_

View File

@ -0,0 +1,492 @@
// Copyright 2010 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Entropy encoding (Huffman) utilities.
#include "./entropy_encode.h"
#include <stdint.h>
#include <algorithm>
#include <limits>
#include <vector>
#include <cstdlib>
#include "./histogram.h"
namespace brotli {
namespace {
struct HuffmanTree {
HuffmanTree();
HuffmanTree(int count, int16_t left, int16_t right)
: total_count_(count),
index_left_(left),
index_right_or_value_(right) {
}
int total_count_;
int16_t index_left_;
int16_t index_right_or_value_;
};
HuffmanTree::HuffmanTree() {}
// Sort the root nodes, least popular first.
bool SortHuffmanTree(const HuffmanTree &v0, const HuffmanTree &v1) {
return v0.total_count_ < v1.total_count_;
}
void SetDepth(const HuffmanTree &p,
HuffmanTree *pool,
uint8_t *depth,
int level) {
if (p.index_left_ >= 0) {
++level;
SetDepth(pool[p.index_left_], pool, depth, level);
SetDepth(pool[p.index_right_or_value_], pool, depth, level);
} else {
depth[p.index_right_or_value_] = level;
}
}
} // namespace
// This function will create a Huffman tree.
//
// The catch here is that the tree cannot be arbitrarily deep.
// Brotli specifies a maximum depth of 15 bits for "code trees"
// and 7 bits for "code length code trees."
//
// count_limit is the value that is to be faked as the minimum value
// and this minimum value is raised until the tree matches the
// maximum length requirement.
//
// This algorithm is not of excellent performance for very long data blocks,
// especially when population counts are longer than 2**tree_limit, but
// we are not planning to use this with extremely long blocks.
//
// See http://en.wikipedia.org/wiki/Huffman_coding
void CreateHuffmanTree(const int *data,
const int length,
const int tree_limit,
uint8_t *depth) {
// For block sizes below 64 kB, we never need to do a second iteration
// of this loop. Probably all of our block sizes will be smaller than
// that, so this loop is mostly of academic interest. If we actually
// would need this, we would be better off with the Katajainen algorithm.
for (int count_limit = 1; ; count_limit *= 2) {
std::vector<HuffmanTree> tree;
tree.reserve(2 * length + 1);
for (int i = length - 1; i >= 0; --i) {
if (data[i]) {
const int count = std::max(data[i], count_limit);
tree.push_back(HuffmanTree(count, -1, i));
}
}
const int n = tree.size();
if (n == 1) {
depth[tree[0].index_right_or_value_] = 1; // Only one element.
break;
}
std::stable_sort(tree.begin(), tree.end(), SortHuffmanTree);
// The nodes are:
// [0, n): the sorted leaf nodes that we start with.
// [n]: we add a sentinel here.
// [n + 1, 2n): new parent nodes are added here, starting from
// (n+1). These are naturally in ascending order.
// [2n]: we add a sentinel at the end as well.
// There will be (2n+1) elements at the end.
const HuffmanTree sentinel(std::numeric_limits<int>::max(), -1, -1);
tree.push_back(sentinel);
tree.push_back(sentinel);
int i = 0; // Points to the next leaf node.
int j = n + 1; // Points to the next non-leaf node.
for (int k = n - 1; k > 0; --k) {
int left, right;
if (tree[i].total_count_ <= tree[j].total_count_) {
left = i;
++i;
} else {
left = j;
++j;
}
if (tree[i].total_count_ <= tree[j].total_count_) {
right = i;
++i;
} else {
right = j;
++j;
}
// The sentinel node becomes the parent node.
int j_end = tree.size() - 1;
tree[j_end].total_count_ =
tree[left].total_count_ + tree[right].total_count_;
tree[j_end].index_left_ = left;
tree[j_end].index_right_or_value_ = right;
// Add back the last sentinel node.
tree.push_back(sentinel);
}
SetDepth(tree[2 * n - 1], &tree[0], depth, 0);
// We need to pack the Huffman tree in tree_limit bits.
// If this was not successful, add fake entities to the lowest values
// and retry.
if (*std::max_element(&depth[0], &depth[length]) <= tree_limit) {
break;
}
}
}
void Reverse(std::vector<uint8_t>* v, int start, int end) {
--end;
while (start < end) {
int tmp = (*v)[start];
(*v)[start] = (*v)[end];
(*v)[end] = tmp;
++start;
--end;
}
}
void WriteHuffmanTreeRepetitions(
const int previous_value,
const int value,
int repetitions,
std::vector<uint8_t> *tree,
std::vector<uint8_t> *extra_bits_data) {
if (previous_value != value) {
tree->push_back(value);
extra_bits_data->push_back(0);
--repetitions;
}
if (repetitions == 7) {
tree->push_back(value);
extra_bits_data->push_back(0);
--repetitions;
}
if (repetitions < 3) {
for (int i = 0; i < repetitions; ++i) {
tree->push_back(value);
extra_bits_data->push_back(0);
}
} else {
repetitions -= 3;
int start = tree->size();
while (repetitions >= 0) {
tree->push_back(16);
extra_bits_data->push_back(repetitions & 0x3);
repetitions >>= 2;
--repetitions;
}
Reverse(tree, start, tree->size());
Reverse(extra_bits_data, start, tree->size());
}
}
void WriteHuffmanTreeRepetitionsZeros(
int repetitions,
std::vector<uint8_t> *tree,
std::vector<uint8_t> *extra_bits_data) {
if (repetitions == 11) {
tree->push_back(0);
extra_bits_data->push_back(0);
--repetitions;
}
if (repetitions < 3) {
for (int i = 0; i < repetitions; ++i) {
tree->push_back(0);
extra_bits_data->push_back(0);
}
} else {
repetitions -= 3;
int start = tree->size();
while (repetitions >= 0) {
tree->push_back(17);
extra_bits_data->push_back(repetitions & 0x7);
repetitions >>= 3;
--repetitions;
}
Reverse(tree, start, tree->size());
Reverse(extra_bits_data, start, tree->size());
}
}
int OptimizeHuffmanCountsForRle(int length, int* counts) {
int nonzero_count = 0;
int stride;
int limit;
int sum;
uint8_t* good_for_rle;
// Let's make the Huffman code more compatible with rle encoding.
int i;
for (i = 0; i < length; i++) {
if (counts[i]) {
++nonzero_count;
}
}
if (nonzero_count < 16) {
return 1;
}
for (; length >= 0; --length) {
if (length == 0) {
return 1; // All zeros.
}
if (counts[length - 1] != 0) {
// Now counts[0..length - 1] does not have trailing zeros.
break;
}
}
{
int nonzeros = 0;
int smallest_nonzero = 1 << 30;
for (i = 0; i < length; ++i) {
if (counts[i] != 0) {
++nonzeros;
if (smallest_nonzero > counts[i]) {
smallest_nonzero = counts[i];
}
}
}
if (nonzeros < 5) {
// Small histogram will model it well.
return 1;
}
int zeros = length - nonzeros;
if (smallest_nonzero < 4) {
if (zeros < 6) {
for (i = 1; i < length - 1; ++i) {
if (counts[i - 1] != 0 && counts[i] == 0 && counts[i + 1] != 0) {
counts[i] = 1;
}
}
}
}
if (nonzeros < 28) {
return 1;
}
}
// 2) Let's mark all population counts that already can be encoded
// with an rle code.
good_for_rle = (uint8_t*)calloc(length, 1);
if (good_for_rle == NULL) {
return 0;
}
{
// Let's not spoil any of the existing good rle codes.
// Mark any seq of 0's that is longer as 5 as a good_for_rle.
// Mark any seq of non-0's that is longer as 7 as a good_for_rle.
int symbol = counts[0];
int stride = 0;
for (i = 0; i < length + 1; ++i) {
if (i == length || counts[i] != symbol) {
if ((symbol == 0 && stride >= 5) ||
(symbol != 0 && stride >= 7)) {
int k;
for (k = 0; k < stride; ++k) {
good_for_rle[i - k - 1] = 1;
}
}
stride = 1;
if (i != length) {
symbol = counts[i];
}
} else {
++stride;
}
}
}
// 3) Let's replace those population counts that lead to more rle codes.
// Math here is in 24.8 fixed point representation.
const int streak_limit = 1240;
stride = 0;
limit = 256 * (counts[0] + counts[1] + counts[2]) / 3 + 420;
sum = 0;
for (i = 0; i < length + 1; ++i) {
if (i == length || good_for_rle[i] ||
(i != 0 && good_for_rle[i - 1]) ||
abs(256 * counts[i] - limit) >= streak_limit) {
if (stride >= 4 || (stride >= 3 && sum == 0)) {
int k;
// The stride must end, collapse what we have, if we have enough (4).
int count = (sum + stride / 2) / stride;
if (count < 1) {
count = 1;
}
if (sum == 0) {
// Don't make an all zeros stride to be upgraded to ones.
count = 0;
}
for (k = 0; k < stride; ++k) {
// We don't want to change value at counts[i],
// that is already belonging to the next stride. Thus - 1.
counts[i - k - 1] = count;
}
}
stride = 0;
sum = 0;
if (i < length - 2) {
// All interesting strides have a count of at least 4,
// at least when non-zeros.
limit = 256 * (counts[i] + counts[i + 1] + counts[i + 2]) / 3 + 420;
} else if (i < length) {
limit = 256 * counts[i];
} else {
limit = 0;
}
}
++stride;
if (i != length) {
sum += counts[i];
if (stride >= 4) {
limit = (256 * sum + stride / 2) / stride;
}
if (stride == 4) {
limit += 120;
}
}
}
free(good_for_rle);
return 1;
}
static void DecideOverRleUse(const uint8_t* depth, const int length,
bool *use_rle_for_non_zero,
bool *use_rle_for_zero) {
int total_reps_zero = 0;
int total_reps_non_zero = 0;
int count_reps_zero = 0;
int count_reps_non_zero = 0;
for (uint32_t i = 0; i < length;) {
const int value = depth[i];
int reps = 1;
for (uint32_t k = i + 1; k < length && depth[k] == value; ++k) {
++reps;
}
if (reps >= 3 && value == 0) {
total_reps_zero += reps;
++count_reps_zero;
}
if (reps >= 4 && value != 0) {
total_reps_non_zero += reps;
++count_reps_non_zero;
}
i += reps;
}
total_reps_non_zero -= count_reps_non_zero * 2;
total_reps_zero -= count_reps_zero * 2;
*use_rle_for_non_zero = total_reps_non_zero > 2;
*use_rle_for_zero = total_reps_zero > 2;
}
void WriteHuffmanTree(const uint8_t* depth,
uint32_t length,
std::vector<uint8_t> *tree,
std::vector<uint8_t> *extra_bits_data) {
int previous_value = 8;
// Throw away trailing zeros.
int new_length = length;
for (int i = 0; i < length; ++i) {
if (depth[length - i - 1] == 0) {
--new_length;
} else {
break;
}
}
// First gather statistics on if it is a good idea to do rle.
bool use_rle_for_non_zero = false;
bool use_rle_for_zero = false;
if (length > 50) {
// Find rle coding for longer codes.
// Shorter codes seem not to benefit from rle.
DecideOverRleUse(depth, new_length,
&use_rle_for_non_zero, &use_rle_for_zero);
}
// Actual rle coding.
for (uint32_t i = 0; i < new_length;) {
const int value = depth[i];
int reps = 1;
if ((value != 0 && use_rle_for_non_zero) ||
(value == 0 && use_rle_for_zero)) {
for (uint32_t k = i + 1; k < new_length && depth[k] == value; ++k) {
++reps;
}
}
if (value == 0) {
WriteHuffmanTreeRepetitionsZeros(reps, tree, extra_bits_data);
} else {
WriteHuffmanTreeRepetitions(previous_value,
value, reps, tree, extra_bits_data);
previous_value = value;
}
i += reps;
}
}
namespace {
uint16_t ReverseBits(int num_bits, uint16_t bits) {
static const size_t kLut[16] = { // Pre-reversed 4-bit values.
0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf
};
size_t retval = kLut[bits & 0xf];
for (int i = 4; i < num_bits; i += 4) {
retval <<= 4;
bits >>= 4;
retval |= kLut[bits & 0xf];
}
retval >>= (-num_bits & 0x3);
return retval;
}
} // namespace
void ConvertBitDepthsToSymbols(const uint8_t *depth, int len, uint16_t *bits) {
// In Brotli, all bit depths are [1..15]
// 0 bit depth means that the symbol does not exist.
const int kMaxBits = 16; // 0..15 are values for bits
uint16_t bl_count[kMaxBits] = { 0 };
{
for (int i = 0; i < len; ++i) {
++bl_count[depth[i]];
}
bl_count[0] = 0;
}
uint16_t next_code[kMaxBits];
next_code[0] = 0;
{
int code = 0;
for (int bits = 1; bits < kMaxBits; ++bits) {
code = (code + bl_count[bits - 1]) << 1;
next_code[bits] = code;
}
}
for (int i = 0; i < len; ++i) {
if (depth[i]) {
bits[i] = ReverseBits(depth[i], next_code[depth[i]]++);
}
}
}
} // namespace brotli

View File

@ -0,0 +1,88 @@
// Copyright 2010 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Entropy encoding (Huffman) utilities.
#ifndef BROTLI_ENC_ENTROPY_ENCODE_H_
#define BROTLI_ENC_ENTROPY_ENCODE_H_
#include <stdint.h>
#include <string.h>
#include <vector>
#include "./histogram.h"
#include "./prefix.h"
namespace brotli {
// This function will create a Huffman tree.
//
// The (data,length) contains the population counts.
// The tree_limit is the maximum bit depth of the Huffman codes.
//
// The depth contains the tree, i.e., how many bits are used for
// the symbol.
//
// See http://en.wikipedia.org/wiki/Huffman_coding
void CreateHuffmanTree(const int *data,
const int length,
const int tree_limit,
uint8_t *depth);
// Change the population counts in a way that the consequent
// Hufmann tree compression, especially its rle-part will be more
// likely to compress this data more efficiently.
//
// length contains the size of the histogram.
// counts contains the population counts.
int OptimizeHuffmanCountsForRle(int length, int* counts);
// Write a huffman tree from bit depths into the bitstream representation
// of a Huffman tree. The generated Huffman tree is to be compressed once
// more using a Huffman tree
void WriteHuffmanTree(const uint8_t* depth,
uint32_t num,
std::vector<uint8_t> *tree,
std::vector<uint8_t> *extra_bits_data);
// Get the actual bit values for a tree of bit depths.
void ConvertBitDepthsToSymbols(const uint8_t *depth, int len, uint16_t *bits);
template<int kSize>
struct EntropyCode {
// How many bits for symbol.
uint8_t depth_[kSize];
// Actual bits used to represent the symbol.
uint16_t bits_[kSize];
// How many non-zero depth.
int count_;
// First four symbols with non-zero depth.
int symbols_[4];
};
static const int kCodeLengthCodes = 18;
// Literal entropy code.
typedef EntropyCode<256> EntropyCodeLiteral;
// Prefix entropy codes.
typedef EntropyCode<kNumCommandPrefixes> EntropyCodeCommand;
typedef EntropyCode<kNumDistancePrefixes> EntropyCodeDistance;
typedef EntropyCode<kNumBlockLenPrefixes> EntropyCodeBlockLength;
// Context map entropy code, 256 Huffman tree indexes + 16 run length codes.
typedef EntropyCode<272> EntropyCodeContextMap;
// Block type entropy code, 256 block types + 2 special symbols.
typedef EntropyCode<258> EntropyCodeBlockType;
} // namespace brotli
#endif // BROTLI_ENC_ENTROPY_ENCODE_H_

View File

@ -0,0 +1,179 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Utilities for fast computation of logarithms.
#ifndef BROTLI_ENC_FAST_LOG_H_
#define BROTLI_ENC_FAST_LOG_H_
#include <assert.h>
#include <math.h>
#include <stdint.h>
namespace brotli {
// Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0.
inline int Log2Floor(uint32_t n) {
#if defined(__clang__) || \
(defined(__GNUC__) && \
((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4))
return n == 0 ? -1 : 31 ^ __builtin_clz(n);
#else
if (n == 0)
return -1;
int log = 0;
uint32_t value = n;
for (int i = 4; i >= 0; --i) {
int shift = (1 << i);
uint32_t x = value >> shift;
if (x != 0) {
value = x;
log += shift;
}
}
assert(value == 1);
return log;
#endif
}
static inline int Log2FloorNonZero(uint32_t n) {
#ifdef __GNUC__
return 31 ^ __builtin_clz(n);
#else
unsigned int result = 0;
while (n >>= 1) result++;
return result;
#endif
}
// Return ceiling(log2(n)) for positive integer n. Returns -1 iff n == 0.
inline int Log2Ceiling(uint32_t n) {
int floor = Log2Floor(n);
if (n == (n &~ (n - 1))) // zero or a power of two
return floor;
else
return floor + 1;
}
// A lookup table for small values of log2(int) to be used in entropy
// computation.
//
// ", ".join(["%.16ff" % x for x in [0.0]+[log2(x) for x in range(1, 256)]])
static const float kLog2Table[] = {
0.0000000000000000f, 0.0000000000000000f, 1.0000000000000000f,
1.5849625007211563f, 2.0000000000000000f, 2.3219280948873622f,
2.5849625007211561f, 2.8073549220576042f, 3.0000000000000000f,
3.1699250014423126f, 3.3219280948873626f, 3.4594316186372978f,
3.5849625007211565f, 3.7004397181410922f, 3.8073549220576037f,
3.9068905956085187f, 4.0000000000000000f, 4.0874628412503400f,
4.1699250014423122f, 4.2479275134435852f, 4.3219280948873626f,
4.3923174227787607f, 4.4594316186372973f, 4.5235619560570131f,
4.5849625007211570f, 4.6438561897747244f, 4.7004397181410926f,
4.7548875021634691f, 4.8073549220576037f, 4.8579809951275728f,
4.9068905956085187f, 4.9541963103868758f, 5.0000000000000000f,
5.0443941193584534f, 5.0874628412503400f, 5.1292830169449664f,
5.1699250014423122f, 5.2094533656289501f, 5.2479275134435852f,
5.2854022188622487f, 5.3219280948873626f, 5.3575520046180838f,
5.3923174227787607f, 5.4262647547020979f, 5.4594316186372973f,
5.4918530963296748f, 5.5235619560570131f, 5.5545888516776376f,
5.5849625007211570f, 5.6147098441152083f, 5.6438561897747244f,
5.6724253419714961f, 5.7004397181410926f, 5.7279204545631996f,
5.7548875021634691f, 5.7813597135246599f, 5.8073549220576046f,
5.8328900141647422f, 5.8579809951275719f, 5.8826430493618416f,
5.9068905956085187f, 5.9307373375628867f, 5.9541963103868758f,
5.9772799234999168f, 6.0000000000000000f, 6.0223678130284544f,
6.0443941193584534f, 6.0660891904577721f, 6.0874628412503400f,
6.1085244567781700f, 6.1292830169449672f, 6.1497471195046822f,
6.1699250014423122f, 6.1898245588800176f, 6.2094533656289510f,
6.2288186904958804f, 6.2479275134435861f, 6.2667865406949019f,
6.2854022188622487f, 6.3037807481771031f, 6.3219280948873617f,
6.3398500028846252f, 6.3575520046180847f, 6.3750394313469254f,
6.3923174227787598f, 6.4093909361377026f, 6.4262647547020979f,
6.4429434958487288f, 6.4594316186372982f, 6.4757334309663976f,
6.4918530963296748f, 6.5077946401986964f, 6.5235619560570131f,
6.5391588111080319f, 6.5545888516776376f, 6.5698556083309478f,
6.5849625007211561f, 6.5999128421871278f, 6.6147098441152092f,
6.6293566200796095f, 6.6438561897747253f, 6.6582114827517955f,
6.6724253419714952f, 6.6865005271832185f, 6.7004397181410917f,
6.7142455176661224f, 6.7279204545631988f, 6.7414669864011465f,
6.7548875021634691f, 6.7681843247769260f, 6.7813597135246599f,
6.7944158663501062f, 6.8073549220576037f, 6.8201789624151887f,
6.8328900141647422f, 6.8454900509443757f, 6.8579809951275719f,
6.8703647195834048f, 6.8826430493618416f, 6.8948177633079437f,
6.9068905956085187f, 6.9188632372745955f, 6.9307373375628867f,
6.9425145053392399f, 6.9541963103868758f, 6.9657842846620879f,
6.9772799234999168f, 6.9886846867721664f, 7.0000000000000000f,
7.0112272554232540f, 7.0223678130284544f, 7.0334230015374501f,
7.0443941193584534f, 7.0552824355011898f, 7.0660891904577721f,
7.0768155970508317f, 7.0874628412503400f, 7.0980320829605272f,
7.1085244567781700f, 7.1189410727235076f, 7.1292830169449664f,
7.1395513523987937f, 7.1497471195046822f, 7.1598713367783891f,
7.1699250014423130f, 7.1799090900149345f, 7.1898245588800176f,
7.1996723448363644f, 7.2094533656289492f, 7.2191685204621621f,
7.2288186904958804f, 7.2384047393250794f, 7.2479275134435861f,
7.2573878426926521f, 7.2667865406949019f, 7.2761244052742384f,
7.2854022188622487f, 7.2946207488916270f, 7.3037807481771031f,
7.3128829552843557f, 7.3219280948873617f, 7.3309168781146177f,
7.3398500028846243f, 7.3487281542310781f, 7.3575520046180847f,
7.3663222142458151f, 7.3750394313469254f, 7.3837042924740528f,
7.3923174227787607f, 7.4008794362821844f, 7.4093909361377026f,
7.4178525148858991f, 7.4262647547020979f, 7.4346282276367255f,
7.4429434958487288f, 7.4512111118323299f, 7.4594316186372973f,
7.4676055500829976f, 7.4757334309663976f, 7.4838157772642564f,
7.4918530963296748f, 7.4998458870832057f, 7.5077946401986964f,
7.5156998382840436f, 7.5235619560570131f, 7.5313814605163119f,
7.5391588111080319f, 7.5468944598876373f, 7.5545888516776376f,
7.5622424242210728f, 7.5698556083309478f, 7.5774288280357487f,
7.5849625007211561f, 7.5924570372680806f, 7.5999128421871278f,
7.6073303137496113f, 7.6147098441152075f, 7.6220518194563764f,
7.6293566200796095f, 7.6366246205436488f, 7.6438561897747244f,
7.6510516911789290f, 7.6582114827517955f, 7.6653359171851765f,
7.6724253419714952f, 7.6794800995054464f, 7.6865005271832185f,
7.6934869574993252f, 7.7004397181410926f, 7.7073591320808825f,
7.7142455176661224f, 7.7210991887071856f, 7.7279204545631996f,
7.7347096202258392f, 7.7414669864011465f, 7.7481928495894596f,
7.7548875021634691f, 7.7615512324444795f, 7.7681843247769260f,
7.7747870596011737f, 7.7813597135246608f, 7.7879025593914317f,
7.7944158663501062f, 7.8008998999203047f, 7.8073549220576037f,
7.8137811912170374f, 7.8201789624151887f, 7.8265484872909159f,
7.8328900141647422f, 7.8392037880969445f, 7.8454900509443757f,
7.8517490414160571f, 7.8579809951275719f, 7.8641861446542798f,
7.8703647195834048f, 7.8765169465650002f, 7.8826430493618425f,
7.8887432488982601f, 7.8948177633079446f, 7.9008668079807496f,
7.9068905956085187f, 7.9128893362299619f, 7.9188632372745955f,
7.9248125036057813f, 7.9307373375628867f, 7.9366379390025719f,
7.9425145053392399f, 7.9483672315846778f, 7.9541963103868758f,
7.9600019320680806f, 7.9657842846620870f, 7.9715435539507720f,
7.9772799234999168f, 7.9829935746943104f, 7.9886846867721664f,
7.9943534368588578f
};
// Faster logarithm for small integers, with the property of log2(0) == 0.
static inline double FastLog2(int v) {
if (v < (int)(sizeof(kLog2Table) / sizeof(kLog2Table[0]))) {
return kLog2Table[v];
}
#if defined(_MSC_VER) && _MSC_VER <= 1600
// Visual Studio 2010 does not have the log2() function defined, so we use
// log() and a multiplication instead.
static const double kLog2Inv = 1.4426950408889634f;
return log(static_cast<double>(v)) * kLog2Inv;
#else
return log2(static_cast<double>(v));
#endif
}
} // namespace brotli
#endif // BROTLI_ENC_FAST_LOG_H_

View File

@ -0,0 +1,87 @@
// Copyright 2010 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Function to find maximal matching prefixes of strings.
#ifndef BROTLI_ENC_FIND_MATCH_LENGTH_H_
#define BROTLI_ENC_FIND_MATCH_LENGTH_H_
#include <stdint.h>
#include <stddef.h>
#include "./port.h"
namespace brotli {
// Separate implementation for little-endian 64-bit targets, for speed.
#if defined(__GNUC__) && defined(_LP64) && defined(IS_LITTLE_ENDIAN)
static inline int FindMatchLengthWithLimit(const uint8_t* s1,
const uint8_t* s2,
size_t limit) {
int matched = 0;
size_t limit2 = (limit >> 3) + 1; // + 1 is for pre-decrement in while
while (PREDICT_TRUE(--limit2)) {
if (PREDICT_FALSE(BROTLI_UNALIGNED_LOAD64(s2) ==
BROTLI_UNALIGNED_LOAD64(s1 + matched))) {
s2 += 8;
matched += 8;
} else {
uint64_t x =
BROTLI_UNALIGNED_LOAD64(s2) ^ BROTLI_UNALIGNED_LOAD64(s1 + matched);
int matching_bits = __builtin_ctzll(x);
matched += matching_bits >> 3;
return matched;
}
}
limit = (limit & 7) + 1; // + 1 is for pre-decrement in while
while (--limit) {
if (PREDICT_TRUE(s1[matched] == *s2)) {
++s2;
++matched;
} else {
return matched;
}
}
return matched;
}
#else
static inline int FindMatchLengthWithLimit(const uint8_t* s1,
const uint8_t* s2,
size_t limit) {
int matched = 0;
const uint8_t* s2_limit = s2 + limit;
const uint8_t* s2_ptr = s2;
// Find out how long the match is. We loop over the data 32 bits at a
// time until we find a 32-bit block that doesn't match; then we find
// the first non-matching bit and use that to calculate the total
// length of the match.
while (s2_ptr <= s2_limit - 4 &&
BROTLI_UNALIGNED_LOAD32(s2_ptr) ==
BROTLI_UNALIGNED_LOAD32(s1 + matched)) {
s2_ptr += 4;
matched += 4;
}
while ((s2_ptr < s2_limit) && (s1[matched] == *s2_ptr)) {
++s2_ptr;
++matched;
}
return matched;
}
#endif
} // namespace brotli
#endif // BROTLI_ENC_FIND_MATCH_LENGTH_H_

View File

@ -0,0 +1,634 @@
// Copyright 2010 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// A (forgetful) hash table to the data seen by the compressor, to
// help create backward references to previous data.
#ifndef BROTLI_ENC_HASH_H_
#define BROTLI_ENC_HASH_H_
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include <sys/types.h>
#include <algorithm>
#include <cstdlib>
#include <memory>
#include <string>
#include "./dictionary_hash.h"
#include "./fast_log.h"
#include "./find_match_length.h"
#include "./port.h"
#include "./prefix.h"
#include "./static_dict.h"
#include "./transform.h"
namespace brotli {
static const int kDistanceCacheIndex[] = {
0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
};
static const int kDistanceCacheOffset[] = {
0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
};
// kHashMul32 multiplier has these properties:
// * The multiplier must be odd. Otherwise we may lose the highest bit.
// * No long streaks of 1s or 0s.
// * There is no effort to ensure that it is a prime, the oddity is enough
// for this use.
// * The number has been tuned heuristically against compression benchmarks.
static const uint32_t kHashMul32 = 0x1e35a7bd;
template<int kShiftBits, int kMinLength>
inline uint32_t Hash(const uint8_t *data) {
if (kMinLength <= 3) {
// If kMinLength is 2 or 3, we hash the first 3 bytes of data.
uint32_t h = (BROTLI_UNALIGNED_LOAD32(data) & 0xffffff) * kHashMul32;
// The higher bits contain more mixture from the multiplication,
// so we take our results from there.
return h >> (32 - kShiftBits);
} else {
// If kMinLength is at least 4, we hash the first 4 bytes of data.
uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
// The higher bits contain more mixture from the multiplication,
// so we take our results from there.
return h >> (32 - kShiftBits);
}
}
// Usually, we always choose the longest backward reference. This function
// allows for the exception of that rule.
//
// If we choose a backward reference that is further away, it will
// usually be coded with more bits. We approximate this by assuming
// log2(distance). If the distance can be expressed in terms of the
// last four distances, we use some heuristic constants to estimate
// the bits cost. For the first up to four literals we use the bit
// cost of the literals from the literal cost model, after that we
// use the average bit cost of the cost model.
//
// This function is used to sometimes discard a longer backward reference
// when it is not much longer and the bit cost for encoding it is more
// than the saved literals.
inline double BackwardReferenceScore(double average_cost,
int copy_length,
int backward_reference_offset) {
return (copy_length * average_cost -
1.20 * Log2Floor(backward_reference_offset));
}
inline double BackwardReferenceScoreUsingLastDistance(double average_cost,
int copy_length,
int distance_short_code) {
static const double kDistanceShortCodeBitCost[16] = {
-0.6, 0.95, 1.17, 1.27,
0.93, 0.93, 0.96, 0.96, 0.99, 0.99,
1.05, 1.05, 1.15, 1.15, 1.25, 1.25
};
return (average_cost * copy_length
- kDistanceShortCodeBitCost[distance_short_code]);
}
// A (forgetful) hash table to the data seen by the compressor, to
// help create backward references to previous data.
//
// This is a hash map of fixed size (kBucketSize). Starting from the
// given index, kBucketSweep buckets are used to store values of a key.
template <int kBucketBits, int kBucketSweep>
class HashLongestMatchQuickly {
public:
HashLongestMatchQuickly() {
Reset();
}
void Reset() {
// It is not strictly necessary to fill this buffer here, but
// not filling will make the results of the compression stochastic
// (but correct). This is because random data would cause the
// system to find accidentally good backward references here and there.
std::fill(&buckets_[0],
&buckets_[sizeof(buckets_) / sizeof(buckets_[0])],
0);
num_dict_lookups_ = 0;
num_dict_matches_ = 0;
}
// Look at 4 bytes at data.
// Compute a hash from these, and store the value somewhere within
// [ix .. ix+3].
inline void Store(const uint8_t *data, const int ix) {
const uint32_t key = Hash<kBucketBits, 4>(data);
// Wiggle the value with the bucket sweep range.
const uint32_t off = (static_cast<uint32_t>(ix) >> 3) % kBucketSweep;
buckets_[key + off] = ix;
}
// Store hashes for a range of data.
void StoreHashes(const uint8_t *data, size_t len, int startix, int mask) {
for (int p = 0; p < len; ++p) {
Store(&data[p & mask], startix + p);
}
}
bool HasStaticDictionary() const { return false; }
// Find a longest backward match of &ring_buffer[cur_ix & ring_buffer_mask]
// up to the length of max_length.
//
// Does not look for matches longer than max_length.
// Does not look for matches further away than max_backward.
// Writes the best found match length into best_len_out.
// Writes the index (&data[index]) of the start of the best match into
// best_distance_out.
inline bool FindLongestMatch(const uint8_t * __restrict ring_buffer,
const size_t ring_buffer_mask,
const float* __restrict literal_cost,
const size_t literal_cost_mask,
const double average_cost,
const int* __restrict distance_cache,
const uint32_t cur_ix,
const uint32_t max_length,
const uint32_t max_backward,
int * __restrict best_len_out,
int * __restrict best_len_code_out,
int * __restrict best_distance_out,
double* __restrict best_score_out) {
const int best_len_in = *best_len_out;
const int cur_ix_masked = cur_ix & ring_buffer_mask;
int compare_char = ring_buffer[cur_ix_masked + best_len_in];
double best_score = *best_score_out;
int best_len = best_len_in;
int backward = distance_cache[0];
size_t prev_ix = cur_ix - backward;
bool match_found = false;
if (prev_ix < cur_ix) {
prev_ix &= ring_buffer_mask;
if (compare_char == ring_buffer[prev_ix + best_len]) {
int len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
&ring_buffer[cur_ix_masked],
max_length);
if (len >= 4) {
best_score = BackwardReferenceScoreUsingLastDistance(average_cost,
len, 0);
best_len = len;
*best_len_out = len;
*best_len_code_out = len;
*best_distance_out = backward;
*best_score_out = best_score;
compare_char = ring_buffer[cur_ix_masked + best_len];
if (kBucketSweep == 1) {
return true;
} else {
match_found = true;
}
}
}
}
const uint32_t key = Hash<kBucketBits, 4>(&ring_buffer[cur_ix_masked]);
if (kBucketSweep == 1) {
// Only one to look for, don't bother to prepare for a loop.
prev_ix = buckets_[key];
backward = cur_ix - prev_ix;
prev_ix &= ring_buffer_mask;
if (compare_char != ring_buffer[prev_ix + best_len_in]) {
return false;
}
if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
return false;
}
const int len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
&ring_buffer[cur_ix_masked],
max_length);
if (len >= 4) {
*best_len_out = len;
*best_len_code_out = len;
*best_distance_out = backward;
*best_score_out = BackwardReferenceScore(average_cost, len, backward);
return true;
} else {
return false;
}
} else {
uint32_t *bucket = buckets_ + key;
prev_ix = *bucket++;
for (int i = 0; i < kBucketSweep; ++i, prev_ix = *bucket++) {
const int backward = cur_ix - prev_ix;
prev_ix &= ring_buffer_mask;
if (compare_char != ring_buffer[prev_ix + best_len]) {
continue;
}
if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
continue;
}
const int len =
FindMatchLengthWithLimit(&ring_buffer[prev_ix],
&ring_buffer[cur_ix_masked],
max_length);
if (len >= 4) {
const double score = BackwardReferenceScore(average_cost,
len, backward);
if (best_score < score) {
best_score = score;
best_len = len;
*best_len_out = best_len;
*best_len_code_out = best_len;
*best_distance_out = backward;
*best_score_out = score;
compare_char = ring_buffer[cur_ix_masked + best_len];
match_found = true;
}
}
}
if (!match_found && num_dict_matches_ >= (num_dict_lookups_ >> 7)) {
++num_dict_lookups_;
const uint32_t key = Hash<14, 4>(&ring_buffer[cur_ix_masked]) << 1;
const uint16_t v = kStaticDictionaryHash[key];
if (v > 0) {
const int len = v & 31;
const int dist = v >> 5;
const int offset = kBrotliDictionaryOffsetsByLength[len] + len * dist;
if (len <= max_length) {
const int matchlen =
FindMatchLengthWithLimit(&ring_buffer[cur_ix_masked],
&kBrotliDictionary[offset], len);
if (matchlen == len) {
const size_t backward = max_backward + dist + 1;
const double score = BackwardReferenceScore(average_cost,
len, backward);
if (best_score < score) {
++num_dict_matches_;
best_score = score;
best_len = len;
*best_len_out = best_len;
*best_len_code_out = best_len;
*best_distance_out = backward;
*best_score_out = best_score;
return true;
}
}
}
}
}
return match_found;
}
}
private:
static const uint32_t kBucketSize = 1 << kBucketBits;
uint32_t buckets_[kBucketSize + kBucketSweep];
size_t num_dict_lookups_;
size_t num_dict_matches_;
};
// A (forgetful) hash table to the data seen by the compressor, to
// help create backward references to previous data.
//
// This is a hash map of fixed size (kBucketSize) to a ring buffer of
// fixed size (kBlockSize). The ring buffer contains the last kBlockSize
// index positions of the given hash key in the compressed data.
template <int kBucketBits,
int kBlockBits,
int kMinLength,
int kNumLastDistancesToCheck,
bool kUseCostModel,
bool kUseDictionary>
class HashLongestMatch {
public:
HashLongestMatch() : static_dict_(NULL) {
Reset();
}
void Reset() {
std::fill(&num_[0], &num_[sizeof(num_) / sizeof(num_[0])], 0);
num_dict_lookups_ = 0;
num_dict_matches_ = 0;
}
void SetStaticDictionary(const StaticDictionary *dict) {
static_dict_ = dict;
}
bool HasStaticDictionary() const {
return static_dict_ != NULL;
}
// Look at 3 bytes at data.
// Compute a hash from these, and store the value of ix at that position.
inline void Store(const uint8_t *data, const int ix) {
const uint32_t key = Hash<kBucketBits, kMinLength>(data);
const int minor_ix = num_[key] & kBlockMask;
buckets_[key][minor_ix] = ix;
++num_[key];
}
// Store hashes for a range of data.
void StoreHashes(const uint8_t *data, size_t len, int startix, int mask) {
for (int p = 0; p < len; ++p) {
Store(&data[p & mask], startix + p);
}
}
// Find a longest backward match of &data[cur_ix] up to the length of
// max_length.
//
// Does not look for matches longer than max_length.
// Does not look for matches further away than max_backward.
// Writes the best found match length into best_len_out.
// Writes the index (&data[index]) offset from the start of the best match
// into best_distance_out.
// Write the score of the best match into best_score_out.
bool FindLongestMatch(const uint8_t * __restrict data,
const size_t ring_buffer_mask,
const float * __restrict literal_cost,
const size_t literal_cost_mask,
const double average_cost,
const int* __restrict distance_cache,
const uint32_t cur_ix,
uint32_t max_length,
const uint32_t max_backward,
int * __restrict best_len_out,
int * __restrict best_len_code_out,
int * __restrict best_distance_out,
double * __restrict best_score_out) {
*best_len_code_out = 0;
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
double start_cost_diff4 = 0.0;
double start_cost_diff3 = 0.0;
double start_cost_diff2 = 0.0;
if (kUseCostModel) {
start_cost_diff4 = literal_cost == NULL ? 0 :
literal_cost[cur_ix & literal_cost_mask] +
literal_cost[(cur_ix + 1) & literal_cost_mask] +
literal_cost[(cur_ix + 2) & literal_cost_mask] +
literal_cost[(cur_ix + 3) & literal_cost_mask] -
4 * average_cost;
start_cost_diff3 = literal_cost == NULL ? 0 :
literal_cost[cur_ix & literal_cost_mask] +
literal_cost[(cur_ix + 1) & literal_cost_mask] +
literal_cost[(cur_ix + 2) & literal_cost_mask] -
3 * average_cost + 0.3;
start_cost_diff2 = literal_cost == NULL ? 0 :
literal_cost[cur_ix & literal_cost_mask] +
literal_cost[(cur_ix + 1) & literal_cost_mask] -
2 * average_cost + 1.2;
}
bool match_found = false;
// Don't accept a short copy from far away.
double best_score = *best_score_out;
int best_len = *best_len_out;
*best_len_out = 0;
// Try last distance first.
for (int i = 0; i < kNumLastDistancesToCheck; ++i) {
const int idx = kDistanceCacheIndex[i];
const int backward = distance_cache[idx] + kDistanceCacheOffset[i];
size_t prev_ix = cur_ix - backward;
if (prev_ix >= cur_ix) {
continue;
}
if (PREDICT_FALSE(backward > max_backward)) {
continue;
}
prev_ix &= ring_buffer_mask;
if (cur_ix_masked + best_len > ring_buffer_mask ||
prev_ix + best_len > ring_buffer_mask ||
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
continue;
}
const size_t len =
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
max_length);
if (len >= std::max(kMinLength, 3) ||
(kMinLength == 2 && len == 2 && i < 2)) {
// Comparing for >= 2 does not change the semantics, but just saves for
// a few unnecessary binary logarithms in backward reference score,
// since we are not interested in such short matches.
double score = BackwardReferenceScoreUsingLastDistance(
average_cost, len, i);
if (kUseCostModel) {
switch (len) {
case 2: score += start_cost_diff2; break;
case 3: score += start_cost_diff3; break;
default: score += start_cost_diff4;
}
}
if (best_score < score) {
best_score = score;
best_len = len;
*best_len_out = best_len;
*best_len_code_out = best_len;
*best_distance_out = backward;
*best_score_out = best_score;
match_found = true;
}
}
}
if (kMinLength == 2) {
int stop = int(cur_ix) - 64;
if (stop < 0) { stop = 0; }
start_cost_diff2 -= 1.0;
for (int i = cur_ix - 1; i > stop; --i) {
size_t prev_ix = i;
const size_t backward = cur_ix - prev_ix;
if (PREDICT_FALSE(backward > max_backward)) {
break;
}
prev_ix &= ring_buffer_mask;
if (data[cur_ix_masked] != data[prev_ix] ||
data[cur_ix_masked + 1] != data[prev_ix + 1]) {
continue;
}
int len = 2;
const double score =
average_cost * 2 - 2.3 * Log2Floor(backward) + start_cost_diff2;
if (best_score < score) {
best_score = score;
best_len = len;
*best_len_out = best_len;
*best_len_code_out = best_len;
*best_distance_out = backward;
match_found = true;
}
}
}
const uint32_t key = Hash<kBucketBits, kMinLength>(&data[cur_ix_masked]);
const int * __restrict const bucket = &buckets_[key][0];
const int down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
for (int i = num_[key] - 1; i >= down; --i) {
int prev_ix = bucket[i & kBlockMask];
if (prev_ix >= 0) {
const size_t backward = cur_ix - prev_ix;
if (PREDICT_FALSE(backward > max_backward)) {
break;
}
prev_ix &= ring_buffer_mask;
if (cur_ix_masked + best_len > ring_buffer_mask ||
prev_ix + best_len > ring_buffer_mask ||
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
continue;
}
const size_t len =
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
max_length);
if (len >= std::max(kMinLength, 3)) {
// Comparing for >= 3 does not change the semantics, but just saves
// for a few unnecessary binary logarithms in backward reference
// score, since we are not interested in such short matches.
double score = BackwardReferenceScore(average_cost,
len, backward);
if (kUseCostModel) {
score += (len >= 4) ? start_cost_diff4 : start_cost_diff3;
}
if (best_score < score) {
best_score = score;
best_len = len;
*best_len_out = best_len;
*best_len_code_out = best_len;
*best_distance_out = backward;
*best_score_out = best_score;
match_found = true;
}
}
}
}
if (!match_found && num_dict_matches_ >= (num_dict_lookups_ >> 7)) {
uint32_t key = Hash<14, 4>(&data[cur_ix_masked]) << 1;
for (int k = 0; k < 2; ++k, ++key) {
++num_dict_lookups_;
const uint16_t v = kStaticDictionaryHash[key];
if (v > 0) {
const int len = v & 31;
const int dist = v >> 5;
const int offset = kBrotliDictionaryOffsetsByLength[len] + len * dist;
if (len <= max_length) {
const int matchlen =
FindMatchLengthWithLimit(&data[cur_ix_masked],
&kBrotliDictionary[offset], len);
if (matchlen == len) {
const size_t backward = max_backward + dist + 1;
double score = BackwardReferenceScore(average_cost,
len, backward);
if (kUseCostModel) {
score += start_cost_diff4;
}
if (best_score < score) {
++num_dict_matches_;
best_score = score;
best_len = len;
*best_len_out = best_len;
*best_len_code_out = best_len;
*best_distance_out = backward;
*best_score_out = best_score;
match_found = true;
break;
}
}
}
}
}
}
if (kUseDictionary && static_dict_ != NULL) {
// We decide based on first 4 bytes how many bytes to test for.
uint32_t prefix = BROTLI_UNALIGNED_LOAD32(&data[cur_ix_masked]);
int maxlen = static_dict_->GetLength(prefix);
for (int len = std::min<size_t>(maxlen, max_length);
len > best_len && len >= 4; --len) {
std::string snippet((const char *)&data[cur_ix_masked], len);
int copy_len_code;
int word_id;
if (static_dict_->Get(snippet, &copy_len_code, &word_id)) {
const size_t backward = max_backward + word_id + 1;
const double score = (BackwardReferenceScore(average_cost,
len, backward) +
start_cost_diff4);
if (best_score < score) {
best_score = score;
best_len = len;
*best_len_out = best_len;
*best_len_code_out = copy_len_code;
*best_distance_out = backward;
*best_score_out = best_score;
match_found = true;
}
}
}
}
return match_found;
}
private:
// Number of hash buckets.
static const uint32_t kBucketSize = 1 << kBucketBits;
// Only kBlockSize newest backward references are kept,
// and the older are forgotten.
static const uint32_t kBlockSize = 1 << kBlockBits;
// Mask for accessing entries in a block (in a ringbuffer manner).
static const uint32_t kBlockMask = (1 << kBlockBits) - 1;
// Number of entries in a particular bucket.
uint16_t num_[kBucketSize];
// Buckets containing kBlockSize of backward references.
int buckets_[kBucketSize][kBlockSize];
size_t num_dict_lookups_;
size_t num_dict_matches_;
const StaticDictionary *static_dict_;
};
struct Hashers {
typedef HashLongestMatchQuickly<16, 1> H1;
typedef HashLongestMatchQuickly<17, 4> H2;
typedef HashLongestMatch<14, 4, 4, 4, false, false> H3;
typedef HashLongestMatch<14, 5, 4, 4, false, false> H4;
typedef HashLongestMatch<15, 6, 4, 10, false, false> H5;
typedef HashLongestMatch<15, 7, 4, 10, false, false> H6;
typedef HashLongestMatch<15, 8, 4, 16, false, false> H7;
typedef HashLongestMatch<15, 8, 4, 16, true, true> H8;
typedef HashLongestMatch<15, 8, 2, 16, true, false> H9;
void Init(int type) {
switch (type) {
case 1: hash_h1.reset(new H1); break;
case 2: hash_h2.reset(new H2); break;
case 3: hash_h3.reset(new H3); break;
case 4: hash_h4.reset(new H4); break;
case 5: hash_h5.reset(new H5); break;
case 6: hash_h6.reset(new H6); break;
case 7: hash_h7.reset(new H7); break;
case 8: hash_h8.reset(new H8); break;
case 9: hash_h9.reset(new H9); break;
default: break;
}
}
void SetStaticDictionary(const StaticDictionary *dict) {
if (hash_h8.get() != NULL) hash_h8->SetStaticDictionary(dict);
}
std::unique_ptr<H1> hash_h1;
std::unique_ptr<H2> hash_h2;
std::unique_ptr<H3> hash_h3;
std::unique_ptr<H4> hash_h4;
std::unique_ptr<H5> hash_h5;
std::unique_ptr<H6> hash_h6;
std::unique_ptr<H7> hash_h7;
std::unique_ptr<H8> hash_h8;
std::unique_ptr<H9> hash_h9;
};
} // namespace brotli
#endif // BROTLI_ENC_HASH_H_

View File

@ -0,0 +1,76 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Build per-context histograms of literals, commands and distance codes.
#include "./histogram.h"
#include <stdint.h>
#include <cmath>
#include "./block_splitter.h"
#include "./command.h"
#include "./context.h"
#include "./prefix.h"
namespace brotli {
void BuildHistograms(
const Command* cmds,
const size_t num_commands,
const BlockSplit& literal_split,
const BlockSplit& insert_and_copy_split,
const BlockSplit& dist_split,
const uint8_t* ringbuffer,
size_t start_pos,
size_t mask,
uint8_t prev_byte,
uint8_t prev_byte2,
const std::vector<int>& context_modes,
std::vector<HistogramLiteral>* literal_histograms,
std::vector<HistogramCommand>* insert_and_copy_histograms,
std::vector<HistogramDistance>* copy_dist_histograms) {
size_t pos = start_pos;
BlockSplitIterator literal_it(literal_split);
BlockSplitIterator insert_and_copy_it(insert_and_copy_split);
BlockSplitIterator dist_it(dist_split);
for (int i = 0; i < num_commands; ++i) {
const Command &cmd = cmds[i];
insert_and_copy_it.Next();
(*insert_and_copy_histograms)[insert_and_copy_it.type_].Add(
cmd.cmd_prefix_);
for (int j = 0; j < cmd.insert_len_; ++j) {
literal_it.Next();
int context = (literal_it.type_ << kLiteralContextBits) +
Context(prev_byte, prev_byte2, context_modes[literal_it.type_]);
(*literal_histograms)[context].Add(ringbuffer[pos & mask]);
prev_byte2 = prev_byte;
prev_byte = ringbuffer[pos & mask];
++pos;
}
pos += cmd.copy_len_;
if (cmd.copy_len_ > 0) {
prev_byte2 = ringbuffer[(pos - 2) & mask];
prev_byte = ringbuffer[(pos - 1) & mask];
if (cmd.cmd_prefix_ >= 128) {
dist_it.Next();
int context = (dist_it.type_ << kDistanceContextBits) +
cmd.DistanceContext();
(*copy_dist_histograms)[context].Add(cmd.dist_prefix_);
}
}
}
}
} // namespace brotli

View File

@ -0,0 +1,107 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Models the histograms of literals, commands and distance codes.
#ifndef BROTLI_ENC_HISTOGRAM_H_
#define BROTLI_ENC_HISTOGRAM_H_
#include <stdint.h>
#include <string.h>
#include <vector>
#include <utility>
#include "./command.h"
#include "./fast_log.h"
#include "./prefix.h"
namespace brotli {
class BlockSplit;
// A simple container for histograms of data in blocks.
template<int kDataSize>
struct Histogram {
Histogram() {
Clear();
}
void Clear() {
memset(data_, 0, sizeof(data_));
total_count_ = 0;
}
void Add(int val) {
++data_[val];
++total_count_;
}
void Remove(int val) {
--data_[val];
--total_count_;
}
template<typename DataType>
void Add(const DataType *p, size_t n) {
total_count_ += n;
n += 1;
while(--n) ++data_[*p++];
}
void AddHistogram(const Histogram& v) {
total_count_ += v.total_count_;
for (int i = 0; i < kDataSize; ++i) {
data_[i] += v.data_[i];
}
}
double EntropyBitCost() const {
double retval = total_count_ * FastLog2(total_count_);
for (int i = 0; i < kDataSize; ++i) {
retval -= data_[i] * FastLog2(data_[i]);
}
return retval;
}
int data_[kDataSize];
int total_count_;
double bit_cost_;
};
// Literal histogram.
typedef Histogram<256> HistogramLiteral;
// Prefix histograms.
typedef Histogram<kNumCommandPrefixes> HistogramCommand;
typedef Histogram<kNumDistancePrefixes> HistogramDistance;
typedef Histogram<kNumBlockLenPrefixes> HistogramBlockLength;
// Context map histogram, 256 Huffman tree indexes + 16 run length codes.
typedef Histogram<272> HistogramContextMap;
// Block type histogram, 256 block types + 2 special symbols.
typedef Histogram<258> HistogramBlockType;
static const int kLiteralContextBits = 6;
static const int kDistanceContextBits = 2;
void BuildHistograms(
const Command* cmds,
const size_t num_commands,
const BlockSplit& literal_split,
const BlockSplit& insert_and_copy_split,
const BlockSplit& dist_split,
const uint8_t* ringbuffer,
size_t pos,
size_t mask,
uint8_t prev_byte,
uint8_t prev_byte2,
const std::vector<int>& context_modes,
std::vector<HistogramLiteral>* literal_histograms,
std::vector<HistogramCommand>* insert_and_copy_histograms,
std::vector<HistogramDistance>* copy_dist_histograms);
} // namespace brotli
#endif // BROTLI_ENC_HISTOGRAM_H_

View File

@ -0,0 +1,172 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Literal cost model to allow backward reference replacement to be efficient.
#include "./literal_cost.h"
#include <math.h>
#include <stdint.h>
#include <algorithm>
#include "./fast_log.h"
namespace brotli {
static int UTF8Position(int last, int c, int clamp) {
if (c < 128) {
return 0; // Next one is the 'Byte 1' again.
} else if (c >= 192) {
return std::min(1, clamp); // Next one is the 'Byte 2' of utf-8 encoding.
} else {
// Let's decide over the last byte if this ends the sequence.
if (last < 0xe0) {
return 0; // Completed two or three byte coding.
} else {
return std::min(2, clamp); // Next one is the 'Byte 3' of utf-8 encoding.
}
}
}
static int DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask,
const uint8_t *data) {
int counts[3] = { 0 };
int max_utf8 = 1; // should be 2, but 1 compresses better.
int last_c = 0;
int utf8_pos = 0;
for (int i = 0; i < len; ++i) {
int c = data[(pos + i) & mask];
utf8_pos = UTF8Position(last_c, c, 2);
++counts[utf8_pos];
last_c = c;
}
if (counts[2] < 500) {
max_utf8 = 1;
}
if (counts[1] + counts[2] < 25) {
max_utf8 = 0;
}
return max_utf8;
}
void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
size_t cost_mask, const uint8_t *data,
float *cost) {
// max_utf8 is 0 (normal ascii single byte modeling),
// 1 (for 2-byte utf-8 modeling), or 2 (for 3-byte utf-8 modeling).
const int max_utf8 = DecideMultiByteStatsLevel(pos, len, mask, data);
int histogram[3][256] = { { 0 } };
int window_half = 495;
int in_window = std::min(static_cast<size_t>(window_half), len);
int in_window_utf8[3] = { 0 };
// Bootstrap histograms.
int last_c = 0;
int utf8_pos = 0;
for (int i = 0; i < in_window; ++i) {
int c = data[(pos + i) & mask];
++histogram[utf8_pos][c];
++in_window_utf8[utf8_pos];
utf8_pos = UTF8Position(last_c, c, max_utf8);
last_c = c;
}
// Compute bit costs with sliding window.
for (int i = 0; i < len; ++i) {
if (i - window_half >= 0) {
// Remove a byte in the past.
int c = (i - window_half - 1) < 0 ?
0 : data[(pos + i - window_half - 1) & mask];
int last_c = (i - window_half - 2) < 0 ?
0 : data[(pos + i - window_half - 2) & mask];
int utf8_pos2 = UTF8Position(last_c, c, max_utf8);
--histogram[utf8_pos2][data[(pos + i - window_half) & mask]];
--in_window_utf8[utf8_pos2];
}
if (i + window_half < len) {
// Add a byte in the future.
int c = (i + window_half - 1) < 0 ?
0 : data[(pos + i + window_half - 1) & mask];
int last_c = (i + window_half - 2) < 0 ?
0 : data[(pos + i + window_half - 2) & mask];
int utf8_pos2 = UTF8Position(last_c, c, max_utf8);
++histogram[utf8_pos2][data[(pos + i + window_half) & mask]];
++in_window_utf8[utf8_pos2];
}
int c = i < 1 ? 0 : data[(pos + i - 1) & mask];
int last_c = i < 2 ? 0 : data[(pos + i - 2) & mask];
int utf8_pos = UTF8Position(last_c, c, max_utf8);
int masked_pos = (pos + i) & mask;
int histo = histogram[utf8_pos][data[masked_pos]];
if (histo == 0) {
histo = 1;
}
float lit_cost = FastLog2(in_window_utf8[utf8_pos]) - FastLog2(histo);
lit_cost += 0.02905;
if (lit_cost < 1.0) {
lit_cost *= 0.5;
lit_cost += 0.5;
}
// Make the first bytes more expensive -- seems to help, not sure why.
// Perhaps because the entropy source is changing its properties
// rapidly in the beginning of the file, perhaps because the beginning
// of the data is a statistical "anomaly".
if (i < 2000) {
lit_cost += 0.7 - ((2000 - i) / 2000.0 * 0.35);
}
cost[(pos + i) & cost_mask] = lit_cost;
}
}
void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
size_t cost_mask, const uint8_t *data,
float *cost) {
int histogram[256] = { 0 };
int window_half = 2000;
int in_window = std::min(static_cast<size_t>(window_half), len);
// Bootstrap histogram.
for (int i = 0; i < in_window; ++i) {
++histogram[data[(pos + i) & mask]];
}
// Compute bit costs with sliding window.
for (int i = 0; i < len; ++i) {
if (i - window_half >= 0) {
// Remove a byte in the past.
--histogram[data[(pos + i - window_half) & mask]];
--in_window;
}
if (i + window_half < len) {
// Add a byte in the future.
++histogram[data[(pos + i + window_half) & mask]];
++in_window;
}
int histo = histogram[data[(pos + i) & mask]];
if (histo == 0) {
histo = 1;
}
float lit_cost = FastLog2(in_window) - FastLog2(histo);
lit_cost += 0.029;
if (lit_cost < 1.0) {
lit_cost *= 0.5;
lit_cost += 0.5;
}
cost[(pos + i) & cost_mask] = lit_cost;
}
}
} // namespace brotli

View File

@ -0,0 +1,38 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Literal cost model to allow backward reference replacement to be efficient.
#ifndef BROTLI_ENC_LITERAL_COST_H_
#define BROTLI_ENC_LITERAL_COST_H_
#include <stddef.h>
#include <stdint.h>
namespace brotli {
// Estimates how many bits the literals in the interval [pos, pos + len) in the
// ringbuffer (data, mask) will take entropy coded and writes these estimates
// to the ringbuffer (cost, mask).
void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
size_t cost_mask, const uint8_t *data,
float *cost);
void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
size_t cost_mask, const uint8_t *data,
float *cost);
} // namespace brotli
#endif // BROTLI_ENC_LITERAL_COST_H_

View File

@ -0,0 +1,319 @@
// Copyright 2015 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Algorithms for distributing the literals and commands of a metablock between
// block types and contexts.
#include "./metablock.h"
#include "./block_splitter.h"
#include "./cluster.h"
#include "./histogram.h"
namespace brotli {
void BuildMetaBlock(const uint8_t* ringbuffer,
const size_t pos,
const size_t mask,
uint8_t prev_byte,
uint8_t prev_byte2,
const Command* cmds,
size_t num_commands,
int literal_context_mode,
bool enable_context_modeling,
MetaBlockSplit* mb) {
SplitBlock(cmds, num_commands,
&ringbuffer[pos & mask],
&mb->literal_split,
&mb->command_split,
&mb->distance_split);
std::vector<int> literal_context_modes(mb->literal_split.num_types,
literal_context_mode);
int num_literal_contexts =
mb->literal_split.num_types << kLiteralContextBits;
int num_distance_contexts =
mb->distance_split.num_types << kDistanceContextBits;
std::vector<HistogramLiteral> literal_histograms(num_literal_contexts);
mb->command_histograms.resize(mb->command_split.num_types);
std::vector<HistogramDistance> distance_histograms(num_distance_contexts);
BuildHistograms(cmds, num_commands,
mb->literal_split,
mb->command_split,
mb->distance_split,
ringbuffer,
pos,
mask,
prev_byte,
prev_byte2,
literal_context_modes,
&literal_histograms,
&mb->command_histograms,
&distance_histograms);
// Histogram ids need to fit in one byte.
static const int kMaxNumberOfHistograms = 256;
mb->literal_histograms = literal_histograms;
if (enable_context_modeling) {
ClusterHistograms(literal_histograms,
1 << kLiteralContextBits,
mb->literal_split.num_types,
kMaxNumberOfHistograms,
&mb->literal_histograms,
&mb->literal_context_map);
} else {
ClusterHistogramsTrivial(literal_histograms,
1 << kLiteralContextBits,
mb->literal_split.num_types,
kMaxNumberOfHistograms,
&mb->literal_histograms,
&mb->literal_context_map);
}
mb->distance_histograms = distance_histograms;
if (enable_context_modeling) {
ClusterHistograms(distance_histograms,
1 << kDistanceContextBits,
mb->distance_split.num_types,
kMaxNumberOfHistograms,
&mb->distance_histograms,
&mb->distance_context_map);
} else {
ClusterHistogramsTrivial(distance_histograms,
1 << kDistanceContextBits,
mb->distance_split.num_types,
kMaxNumberOfHistograms,
&mb->distance_histograms,
&mb->distance_context_map);
}
}
// Greedy block splitter for one block category (literal, command or distance).
template<typename HistogramType>
class BlockSplitter {
public:
BlockSplitter(int alphabet_size,
int min_block_size,
double split_threshold,
int num_symbols,
BlockSplit* split,
std::vector<HistogramType>* histograms)
: alphabet_size_(alphabet_size),
min_block_size_(min_block_size),
split_threshold_(split_threshold),
num_blocks_(0),
split_(split),
histograms_(histograms),
target_block_size_(min_block_size),
block_size_(0),
curr_histogram_ix_(0),
merge_last_count_(0) {
int max_num_blocks = num_symbols / min_block_size + 1;
// We have to allocate one more histogram than the maximum number of block
// types for the current histogram when the meta-block is too big.
int max_num_types = std::min(max_num_blocks, kMaxBlockTypes + 1);
split_->lengths.resize(max_num_blocks);
split_->types.resize(max_num_blocks);
histograms_->resize(max_num_types);
last_histogram_ix_[0] = last_histogram_ix_[1] = 0;
}
// Adds the next symbol to the current histogram. When the current histogram
// reaches the target size, decides on merging the block.
void AddSymbol(int symbol) {
(*histograms_)[curr_histogram_ix_].Add(symbol);
++block_size_;
if (block_size_ == target_block_size_) {
FinishBlock(/* is_final = */ false);
}
}
// Does either of three things:
// (1) emits the current block with a new block type;
// (2) emits the current block with the type of the second last block;
// (3) merges the current block with the last block.
void FinishBlock(bool is_final) {
if (block_size_ < min_block_size_) {
block_size_ = min_block_size_;
}
if (num_blocks_ == 0) {
// Create first block.
split_->lengths[0] = block_size_;
split_->types[0] = 0;
last_entropy_[0] =
BitsEntropy(&(*histograms_)[0].data_[0], alphabet_size_);
last_entropy_[1] = last_entropy_[0];
++num_blocks_;
++split_->num_types;
++curr_histogram_ix_;
block_size_ = 0;
} else if (block_size_ > 0) {
double entropy = BitsEntropy(&(*histograms_)[curr_histogram_ix_].data_[0],
alphabet_size_);
HistogramType combined_histo[2];
double combined_entropy[2];
double diff[2];
for (int j = 0; j < 2; ++j) {
int last_histogram_ix = last_histogram_ix_[j];
combined_histo[j] = (*histograms_)[curr_histogram_ix_];
combined_histo[j].AddHistogram((*histograms_)[last_histogram_ix]);
combined_entropy[j] = BitsEntropy(
&combined_histo[j].data_[0], alphabet_size_);
diff[j] = combined_entropy[j] - entropy - last_entropy_[j];
}
if (split_->num_types < kMaxBlockTypes &&
diff[0] > split_threshold_ &&
diff[1] > split_threshold_) {
// Create new block.
split_->lengths[num_blocks_] = block_size_;
split_->types[num_blocks_] = split_->num_types;
last_histogram_ix_[1] = last_histogram_ix_[0];
last_histogram_ix_[0] = split_->num_types;
last_entropy_[1] = last_entropy_[0];
last_entropy_[0] = entropy;
++num_blocks_;
++split_->num_types;
++curr_histogram_ix_;
block_size_ = 0;
merge_last_count_ = 0;
target_block_size_ = min_block_size_;
} else if (diff[1] < diff[0] - 20.0) {
// Combine this block with second last block.
split_->lengths[num_blocks_] = block_size_;
split_->types[num_blocks_] = split_->types[num_blocks_ - 2];
std::swap(last_histogram_ix_[0], last_histogram_ix_[1]);
(*histograms_)[last_histogram_ix_[0]] = combined_histo[1];
last_entropy_[1] = last_entropy_[0];
last_entropy_[0] = combined_entropy[1];
++num_blocks_;
block_size_ = 0;
(*histograms_)[curr_histogram_ix_].Clear();
merge_last_count_ = 0;
target_block_size_ = min_block_size_;
} else {
// Combine this block with last block.
split_->lengths[num_blocks_ - 1] += block_size_;
(*histograms_)[last_histogram_ix_[0]] = combined_histo[0];
last_entropy_[0] = combined_entropy[0];
if (split_->num_types == 1) {
last_entropy_[1] = last_entropy_[0];
}
block_size_ = 0;
(*histograms_)[curr_histogram_ix_].Clear();
if (++merge_last_count_ > 1) {
target_block_size_ += min_block_size_;
}
}
}
if (is_final) {
(*histograms_).resize(split_->num_types);
split_->types.resize(num_blocks_);
split_->lengths.resize(num_blocks_);
}
}
private:
static const int kMaxBlockTypes = 256;
// Alphabet size of particular block category.
const int alphabet_size_;
// We collect at least this many symbols for each block.
const int min_block_size_;
// We merge histograms A and B if
// entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
// where A is the current histogram and B is the histogram of the last or the
// second last block type.
const double split_threshold_;
int num_blocks_;
BlockSplit* split_; // not owned
std::vector<HistogramType>* histograms_; // not owned
// The number of symbols that we want to collect before deciding on whether
// or not to merge the block with a previous one or emit a new block.
int target_block_size_;
// The number of symbols in the current histogram.
int block_size_;
// Offset of the current histogram.
int curr_histogram_ix_;
// Offset of the histograms of the previous two block types.
int last_histogram_ix_[2];
// Entropy of the previous two block types.
double last_entropy_[2];
// The number of times we merged the current block with the last one.
int merge_last_count_;
};
void BuildMetaBlockGreedy(const uint8_t* ringbuffer,
size_t pos,
size_t mask,
const Command *commands,
size_t n_commands,
MetaBlockSplit* mb) {
int num_literals = 0;
for (int i = 0; i < n_commands; ++i) {
num_literals += commands[i].insert_len_;
}
BlockSplitter<HistogramLiteral> lit_blocks(
256, 512, 400.0, num_literals,
&mb->literal_split, &mb->literal_histograms);
BlockSplitter<HistogramCommand> cmd_blocks(
kNumCommandPrefixes, 1024, 500.0, n_commands,
&mb->command_split, &mb->command_histograms);
BlockSplitter<HistogramDistance> dist_blocks(
64, 512, 100.0, n_commands,
&mb->distance_split, &mb->distance_histograms);
for (int i = 0; i < n_commands; ++i) {
const Command cmd = commands[i];
cmd_blocks.AddSymbol(cmd.cmd_prefix_);
for (int j = 0; j < cmd.insert_len_; ++j) {
lit_blocks.AddSymbol(ringbuffer[pos & mask]);
++pos;
}
pos += cmd.copy_len_;
if (cmd.copy_len_ > 0 && cmd.cmd_prefix_ >= 128) {
dist_blocks.AddSymbol(cmd.dist_prefix_);
}
}
lit_blocks.FinishBlock(/* is_final = */ true);
cmd_blocks.FinishBlock(/* is_final = */ true);
dist_blocks.FinishBlock(/* is_final = */ true);
}
void OptimizeHistograms(int num_direct_distance_codes,
int distance_postfix_bits,
MetaBlockSplit* mb) {
for (int i = 0; i < mb->literal_histograms.size(); ++i) {
OptimizeHuffmanCountsForRle(256, &mb->literal_histograms[i].data_[0]);
}
for (int i = 0; i < mb->command_histograms.size(); ++i) {
OptimizeHuffmanCountsForRle(kNumCommandPrefixes,
&mb->command_histograms[i].data_[0]);
}
int num_distance_codes =
kNumDistanceShortCodes + num_direct_distance_codes +
(48 << distance_postfix_bits);
for (int i = 0; i < mb->distance_histograms.size(); ++i) {
OptimizeHuffmanCountsForRle(num_distance_codes,
&mb->distance_histograms[i].data_[0]);
}
}
} // namespace brotli

View File

@ -0,0 +1,71 @@
// Copyright 2015 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Algorithms for distributing the literals and commands of a metablock between
// block types and contexts.
#ifndef BROTLI_ENC_METABLOCK_H_
#define BROTLI_ENC_METABLOCK_H_
#include <vector>
#include "./command.h"
#include "./histogram.h"
namespace brotli {
struct BlockSplit {
BlockSplit() : num_types(0) {}
int num_types;
std::vector<int> types;
std::vector<int> lengths;
};
struct MetaBlockSplit {
BlockSplit literal_split;
BlockSplit command_split;
BlockSplit distance_split;
std::vector<int> literal_context_map;
std::vector<int> distance_context_map;
std::vector<HistogramLiteral> literal_histograms;
std::vector<HistogramCommand> command_histograms;
std::vector<HistogramDistance> distance_histograms;
};
void BuildMetaBlock(const uint8_t* ringbuffer,
const size_t pos,
const size_t mask,
uint8_t prev_byte,
uint8_t prev_byte2,
const Command* cmds,
size_t num_commands,
int literal_context_mode,
bool enable_context_modleing,
MetaBlockSplit* mb);
void BuildMetaBlockGreedy(const uint8_t* ringbuffer,
size_t pos,
size_t mask,
const Command *commands,
size_t n_commands,
MetaBlockSplit* mb);
void OptimizeHistograms(int num_direct_distance_codes,
int distance_postfix_bits,
MetaBlockSplit* mb);
} // namespace brotli
#endif // BROTLI_ENC_METABLOCK_H_

View File

@ -0,0 +1,143 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Macros for endianness, branch prediction and unaligned loads and stores.
#ifndef BROTLI_ENC_PORT_H_
#define BROTLI_ENC_PORT_H_
#if defined OS_LINUX || defined OS_CYGWIN
#include <endian.h>
#elif defined OS_FREEBSD
#include <machine/endian.h>
#elif defined OS_MACOSX
#include <machine/endian.h>
/* Let's try and follow the Linux convention */
#define __BYTE_ORDER BYTE_ORDER
#define __LITTLE_ENDIAN LITTLE_ENDIAN
#define __BIG_ENDIAN BIG_ENDIAN
#endif
// define the macros IS_LITTLE_ENDIAN or IS_BIG_ENDIAN
// using the above endian definitions from endian.h if
// endian.h was included
#ifdef __BYTE_ORDER
#if __BYTE_ORDER == __LITTLE_ENDIAN
#define IS_LITTLE_ENDIAN
#endif
#if __BYTE_ORDER == __BIG_ENDIAN
#define IS_BIG_ENDIAN
#endif
#else
#if defined(__LITTLE_ENDIAN__)
#define IS_LITTLE_ENDIAN
#elif defined(__BIG_ENDIAN__)
#define IS_BIG_ENDIAN
#endif
#endif // __BYTE_ORDER
// Enable little-endian optimization for x64 architecture on Windows.
#if (defined(_WIN32) || defined(_WIN64)) && defined(_M_X64)
#define IS_LITTLE_ENDIAN
#endif
#if defined(COMPILER_GCC3)
#define PREDICT_FALSE(x) (__builtin_expect(x, 0))
#define PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
#else
#define PREDICT_FALSE(x) x
#define PREDICT_TRUE(x) x
#endif
// Portable handling of unaligned loads, stores, and copies.
// On some platforms, like ARM, the copy functions can be more efficient
// then a load and a store.
#if defined(ARCH_PIII) || defined(ARCH_ATHLON) || \
defined(ARCH_K8) || defined(_ARCH_PPC)
// x86 and x86-64 can perform unaligned loads/stores directly;
// modern PowerPC hardware can also do unaligned integer loads and stores;
// but note: the FPU still sends unaligned loads and stores to a trap handler!
#define BROTLI_UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32_t *>(_p))
#define BROTLI_UNALIGNED_LOAD64(_p) (*reinterpret_cast<const uint64_t *>(_p))
#define BROTLI_UNALIGNED_STORE32(_p, _val) \
(*reinterpret_cast<uint32_t *>(_p) = (_val))
#define BROTLI_UNALIGNED_STORE64(_p, _val) \
(*reinterpret_cast<uint64_t *>(_p) = (_val))
#elif defined(__arm__) && \
!defined(__ARM_ARCH_5__) && \
!defined(__ARM_ARCH_5T__) && \
!defined(__ARM_ARCH_5TE__) && \
!defined(__ARM_ARCH_5TEJ__) && \
!defined(__ARM_ARCH_6__) && \
!defined(__ARM_ARCH_6J__) && \
!defined(__ARM_ARCH_6K__) && \
!defined(__ARM_ARCH_6Z__) && \
!defined(__ARM_ARCH_6ZK__) && \
!defined(__ARM_ARCH_6T2__)
// ARMv7 and newer support native unaligned accesses, but only of 16-bit
// and 32-bit values (not 64-bit); older versions either raise a fatal signal,
// do an unaligned read and rotate the words around a bit, or do the reads very
// slowly (trip through kernel mode).
#define BROTLI_UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32_t *>(_p))
#define BROTLI_UNALIGNED_STORE32(_p, _val) \
(*reinterpret_cast<uint32_t *>(_p) = (_val))
inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
uint64_t t;
memcpy(&t, p, sizeof t);
return t;
}
inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
memcpy(p, &v, sizeof v);
}
#else
// These functions are provided for architectures that don't support
// unaligned loads and stores.
inline uint32_t BROTLI_UNALIGNED_LOAD32(const void *p) {
uint32_t t;
memcpy(&t, p, sizeof t);
return t;
}
inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
uint64_t t;
memcpy(&t, p, sizeof t);
return t;
}
inline void BROTLI_UNALIGNED_STORE32(void *p, uint32_t v) {
memcpy(p, &v, sizeof v);
}
inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
memcpy(p, &v, sizeof v);
}
#endif
#endif // BROTLI_ENC_PORT_H_

View File

@ -0,0 +1,86 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Functions for encoding of integers into prefix codes the amount of extra
// bits, and the actual values of the extra bits.
#ifndef BROTLI_ENC_PREFIX_H_
#define BROTLI_ENC_PREFIX_H_
#include <stdint.h>
#include "./fast_log.h"
namespace brotli {
static const int kNumInsertLenPrefixes = 24;
static const int kNumCopyLenPrefixes = 24;
static const int kNumCommandPrefixes = 704;
static const int kNumBlockLenPrefixes = 26;
static const int kNumDistanceShortCodes = 16;
static const int kNumDistancePrefixes = 520;
// Represents the range of values belonging to a prefix code:
// [offset, offset + 2^nbits)
struct PrefixCodeRange {
int offset;
int nbits;
};
static const PrefixCodeRange kBlockLengthPrefixCode[kNumBlockLenPrefixes] = {
{ 1, 2}, { 5, 2}, { 9, 2}, { 13, 2},
{ 17, 3}, { 25, 3}, { 33, 3}, { 41, 3},
{ 49, 4}, { 65, 4}, { 81, 4}, { 97, 4},
{ 113, 5}, { 145, 5}, { 177, 5}, { 209, 5},
{ 241, 6}, { 305, 6}, { 369, 7}, { 497, 8},
{ 753, 9}, { 1265, 10}, {2289, 11}, {4337, 12},
{8433, 13}, {16625, 24}
};
inline void GetBlockLengthPrefixCode(int len,
int* code, int* n_extra, int* extra) {
*code = 0;
while (*code < 25 && len >= kBlockLengthPrefixCode[*code + 1].offset) {
++(*code);
}
*n_extra = kBlockLengthPrefixCode[*code].nbits;
*extra = len - kBlockLengthPrefixCode[*code].offset;
}
inline void PrefixEncodeCopyDistance(int distance_code,
int num_direct_codes,
int postfix_bits,
uint16_t* code,
uint32_t* extra_bits) {
distance_code -= 1;
if (distance_code < kNumDistanceShortCodes + num_direct_codes) {
*code = distance_code;
*extra_bits = 0;
return;
}
distance_code -= kNumDistanceShortCodes + num_direct_codes;
distance_code += (1 << (postfix_bits + 2));
int bucket = Log2Floor(distance_code) - 1;
int postfix_mask = (1 << postfix_bits) - 1;
int postfix = distance_code & postfix_mask;
int prefix = (distance_code >> bucket) & 1;
int offset = (2 + prefix) << bucket;
int nbits = bucket - postfix_bits;
*code = kNumDistanceShortCodes + num_direct_codes +
((2 * (nbits - 1) + prefix) << postfix_bits) + postfix;
*extra_bits = (nbits << 24) | ((distance_code - offset) >> postfix_bits);
}
} // namespace brotli
#endif // BROTLI_ENC_PREFIX_H_

View File

@ -0,0 +1,108 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Sliding window over the input data.
#ifndef BROTLI_ENC_RINGBUFFER_H_
#define BROTLI_ENC_RINGBUFFER_H_
#include <stddef.h>
#include <stdint.h>
#include "./port.h"
namespace brotli {
// A RingBuffer(window_bits, tail_bits) contains `1 << window_bits' bytes of
// data in a circular manner: writing a byte writes it to
// `position() % (1 << window_bits)'. For convenience, the RingBuffer array
// contains another copy of the first `1 << tail_bits' bytes:
// buffer_[i] == buffer_[i + (1 << window_bits)] if i < (1 << tail_bits).
class RingBuffer {
public:
RingBuffer(int window_bits, int tail_bits)
: window_bits_(window_bits),
mask_((1 << window_bits) - 1),
tail_size_(1 << tail_bits),
pos_(0) {
static const int kSlackForFourByteHashingEverywhere = 3;
const int buflen = (1 << window_bits_) + tail_size_;
buffer_ = new uint8_t[buflen + kSlackForFourByteHashingEverywhere];
for (int i = 0; i < kSlackForFourByteHashingEverywhere; ++i) {
buffer_[buflen + i] = 0;
}
}
~RingBuffer() {
delete [] buffer_;
}
// Push bytes into the ring buffer.
void Write(const uint8_t *bytes, size_t n) {
const size_t masked_pos = pos_ & mask_;
// The length of the writes is limited so that we do not need to worry
// about a write
WriteTail(bytes, n);
if (PREDICT_TRUE(masked_pos + n <= (1 << window_bits_))) {
// A single write fits.
memcpy(&buffer_[masked_pos], bytes, n);
} else {
// Split into two writes.
// Copy into the end of the buffer, including the tail buffer.
memcpy(&buffer_[masked_pos], bytes,
std::min(n, ((1 << window_bits_) + tail_size_) - masked_pos));
// Copy into the begining of the buffer
memcpy(&buffer_[0], bytes + ((1 << window_bits_) - masked_pos),
n - ((1 << window_bits_) - masked_pos));
}
pos_ += n;
}
void Reset() {
pos_ = 0;
}
// Logical cursor position in the ring buffer.
size_t position() const { return pos_; }
// Bit mask for getting the physical position for a logical position.
size_t mask() const { return mask_; }
uint8_t *start() { return &buffer_[0]; }
const uint8_t *start() const { return &buffer_[0]; }
private:
void WriteTail(const uint8_t *bytes, size_t n) {
const size_t masked_pos = pos_ & mask_;
if (PREDICT_FALSE(masked_pos < tail_size_)) {
// Just fill the tail buffer with the beginning data.
const size_t p = (1 << window_bits_) + masked_pos;
memcpy(&buffer_[p], bytes, std::min(n, tail_size_ - masked_pos));
}
}
// Size of the ringbuffer is (1 << window_bits) + tail_size_.
const int window_bits_;
const size_t mask_;
const size_t tail_size_;
// Position to write in the ring buffer.
size_t pos_;
// The actual ring buffer containing the data and the copy of the beginning
// as a tail.
uint8_t *buffer_;
};
} // namespace brotli
#endif // BROTLI_ENC_RINGBUFFER_H_

View File

@ -0,0 +1,87 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Class to model the static dictionary.
#ifndef BROTLI_ENC_STATIC_DICT_H_
#define BROTLI_ENC_STATIC_DICT_H_
#include <algorithm>
#include <unordered_map>
#include <string>
#include "./dictionary.h"
#include "./transform.h"
namespace brotli {
class StaticDictionary {
public:
StaticDictionary() {}
void Fill(bool enable_transforms) {
const int num_transforms = enable_transforms ? kNumTransforms : 1;
for (int t = num_transforms - 1; t >= 0; --t) {
for (int i = kMaxDictionaryWordLength;
i >= kMinDictionaryWordLength; --i) {
const int num_words = 1 << kBrotliDictionarySizeBitsByLength[i];
for (int j = num_words - 1; j >= 0; --j) {
int word_id = t * num_words + j;
std::string word = GetTransformedDictionaryWord(i, word_id);
if (word.size() >= 4) {
Insert(word, i, word_id);
}
}
}
}
}
void Insert(const std::string &str, int len, int dist) {
int ix = (dist << 6) + len;
std::unordered_map<std::string, int>::const_iterator it = map_.find(str);
if (it != map_.end() && ix >= it->second) {
return;
}
map_[str] = ix;
uint32_t v = 0;
for (int i = 0; i < 4 && i < str.size(); ++i) {
v += static_cast<uint32_t>(str[i]) << (8 * i);
}
if (prefix_map_[v] < str.size()) {
prefix_map_[v] = str.size();
}
}
int GetLength(uint32_t v) const {
std::unordered_map<uint32_t, int>::const_iterator it = prefix_map_.find(v);
if (it == prefix_map_.end()) {
return 0;
}
return it->second;
}
bool Get(const std::string &str, int *len, int *dist) const {
std::unordered_map<std::string, int>::const_iterator it = map_.find(str);
if (it == map_.end()) {
return false;
}
int v = it->second;
*len = v & 63;
*dist = v >> 6;
return true;
}
private:
std::unordered_map<std::string, int> map_;
std::unordered_map<uint32_t, int> prefix_map_;
};
} // namespace brotli
#endif // BROTLI_ENC_STATIC_DICT_H_

View File

@ -0,0 +1,126 @@
// Copyright 2009 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Convience routines to make Brotli I/O classes from some memory containers and
// files.
#include "./streams.h"
#include <assert.h>
#include <string.h>
namespace brotli {
BrotliMemOut::BrotliMemOut(void* buf, int len)
: buf_(buf),
len_(len),
pos_(0) {}
void BrotliMemOut::Reset(void* buf, int len) {
buf_ = buf;
len_ = len;
pos_ = 0;
}
// Brotli output routine: copy n bytes to the output buffer.
bool BrotliMemOut::Write(const void *buf, size_t n) {
if (n + pos_ > len_)
return false;
char* p = reinterpret_cast<char*>(buf_) + pos_;
memcpy(p, buf, n);
pos_ += n;
return true;
}
BrotliStringOut::BrotliStringOut(std::string* buf, int max_size)
: buf_(buf),
max_size_(max_size) {
assert(buf->empty());
}
void BrotliStringOut::Reset(std::string* buf, int max_size) {
buf_ = buf;
max_size_ = max_size;
}
// Brotli output routine: add n bytes to a string.
bool BrotliStringOut::Write(const void *buf, size_t n) {
if (buf_->size() + n > max_size_)
return false;
buf_->append(static_cast<const char*>(buf), n);
return true;
}
BrotliMemIn::BrotliMemIn(const void* buf, int len)
: buf_(buf),
len_(len),
pos_(0) {}
void BrotliMemIn::Reset(const void* buf, int len) {
buf_ = buf;
len_ = len;
pos_ = 0;
}
// Brotli input routine: read the next chunk of memory.
const void* BrotliMemIn::Read(size_t n, size_t* output) {
if (pos_ == len_) {
return NULL;
}
if (n > len_ - pos_)
n = len_ - pos_;
const char* p = reinterpret_cast<const char*>(buf_) + pos_;
pos_ += n;
*output = n;
return p;
}
BrotliFileIn::BrotliFileIn(FILE* f, size_t max_read_size)
: f_(f),
buf_(malloc(max_read_size)),
buf_size_(max_read_size) {}
BrotliFileIn::~BrotliFileIn() {
if (buf_) free(buf_);
}
const void* BrotliFileIn::Read(size_t n, size_t* bytes_read) {
if (buf_ == NULL) {
*bytes_read = 0;
return NULL;
}
if (n > buf_size_) {
n = buf_size_;
} else if (n == 0) {
return feof(f_) ? NULL : buf_;
}
*bytes_read = fread(buf_, 1, n, f_);
if (*bytes_read == 0) {
return NULL;
} else {
return buf_;
}
}
BrotliFileOut::BrotliFileOut(FILE* f) : f_(f) {}
bool BrotliFileOut::Write(const void* buf, size_t n) {
if (fwrite(buf, n, 1, f_) != 1) {
return false;
}
return true;
}
} // namespace brotli

View File

@ -0,0 +1,130 @@
// Copyright 2009 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Input and output classes for streaming brotli compression.
#ifndef BROTLI_ENC_STREAMS_H_
#define BROTLI_ENC_STREAMS_H_
#include <stdlib.h>
#include <stddef.h>
#include <stdio.h>
#include <string>
namespace brotli {
// Input interface for the compression routines.
class BrotliIn {
public:
virtual ~BrotliIn() {}
// Return a pointer to the next block of input of at most n bytes.
// Return the actual length in *nread.
// At end of data, return NULL. Don't return NULL if there is more data
// to read, even if called with n == 0.
// Read will only be called if some of its bytes are needed.
virtual const void* Read(size_t n, size_t* nread) = 0;
};
// Output interface for the compression routines.
class BrotliOut {
public:
virtual ~BrotliOut() {}
// Write n bytes of data from buf.
// Return true if all written, false otherwise.
virtual bool Write(const void *buf, size_t n) = 0;
};
// Adapter class to make BrotliIn objects from raw memory.
class BrotliMemIn : public BrotliIn {
public:
BrotliMemIn(const void* buf, int len);
void Reset(const void* buf, int len);
// returns the amount of data consumed
int position() const { return pos_; }
const void* Read(size_t n, size_t* OUTPUT) override;
private:
const void* buf_; // start of input buffer
int len_; // length of input
int pos_; // current read position within input
};
// Adapter class to make BrotliOut objects from raw memory.
class BrotliMemOut : public BrotliOut {
public:
BrotliMemOut(void* buf, int len);
void Reset(void* buf, int len);
// returns the amount of data written
int position() const { return pos_; }
bool Write(const void* buf, size_t n) override;
private:
void* buf_; // start of output buffer
int len_; // length of output
int pos_; // current write position within output
};
// Adapter class to make BrotliOut objects from a string.
class BrotliStringOut : public BrotliOut {
public:
// Create a writer that appends its data to buf.
// buf->size() will grow to at most max_size
// buf is expected to be empty when constructing BrotliStringOut.
BrotliStringOut(std::string* buf, int max_size);
void Reset(std::string* buf, int max_len);
bool Write(const void* buf, size_t n) override;
private:
std::string* buf_; // start of output buffer
int max_size_; // max length of output
};
// Adapter class to make BrotliIn object from a file.
class BrotliFileIn : public BrotliIn {
public:
BrotliFileIn(FILE* f, size_t max_read_size);
~BrotliFileIn();
const void* Read(size_t n, size_t* bytes_read) override;
private:
FILE* f_;
void* buf_;
size_t buf_size_;
};
// Adapter class to make BrotliOut object from a file.
class BrotliFileOut : public BrotliOut {
public:
explicit BrotliFileOut(FILE* f);
bool Write(const void* buf, size_t n) override;
private:
FILE* f_;
};
} // namespace brotli
#endif // BROTLI_ENC_STREAMS_H_

Binary file not shown.

View File

@ -0,0 +1,242 @@
// Copyright 2010 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Transformations on dictionary words.
#ifndef BROTLI_ENC_TRANSFORM_H_
#define BROTLI_ENC_TRANSFORM_H_
#include <string>
#include "./dictionary.h"
namespace brotli {
enum WordTransformType {
kIdentity = 0,
kOmitLast1 = 1,
kOmitLast2 = 2,
kOmitLast3 = 3,
kOmitLast4 = 4,
kOmitLast5 = 5,
kOmitLast6 = 6,
kOmitLast7 = 7,
kOmitLast8 = 8,
kOmitLast9 = 9,
kUppercaseFirst = 10,
kUppercaseAll = 11,
kOmitFirst1 = 12,
kOmitFirst2 = 13,
kOmitFirst3 = 14,
kOmitFirst4 = 15,
kOmitFirst5 = 16,
kOmitFirst6 = 17,
kOmitFirst7 = 18,
kOmitFirst8 = 19,
kOmitFirst9 = 20,
};
struct Transform {
const char* prefix;
WordTransformType word_transform;
const char* suffix;
};
static const Transform kTransforms[] = {
{ "", kIdentity, "" },
{ "", kIdentity, " " },
{ " ", kIdentity, " " },
{ "", kOmitFirst1, "" },
{ "", kUppercaseFirst, " " },
{ "", kIdentity, " the " },
{ " ", kIdentity, "" },
{ "s ", kIdentity, " " },
{ "", kIdentity, " of " },
{ "", kUppercaseFirst, "" },
{ "", kIdentity, " and " },
{ "", kOmitFirst2, "" },
{ "", kOmitLast1, "" },
{ ", ", kIdentity, " " },
{ "", kIdentity, ", " },
{ " ", kUppercaseFirst, " " },
{ "", kIdentity, " in " },
{ "", kIdentity, " to " },
{ "e ", kIdentity, " " },
{ "", kIdentity, "\"" },
{ "", kIdentity, "." },
{ "", kIdentity, "\">" },
{ "", kIdentity, "\n" },
{ "", kOmitLast3, "" },
{ "", kIdentity, "]" },
{ "", kIdentity, " for " },
{ "", kOmitFirst3, "" },
{ "", kOmitLast2, "" },
{ "", kIdentity, " a " },
{ "", kIdentity, " that " },
{ " ", kUppercaseFirst, "" },
{ "", kIdentity, ". " },
{ ".", kIdentity, "" },
{ " ", kIdentity, ", " },
{ "", kOmitFirst4, "" },
{ "", kIdentity, " with " },
{ "", kIdentity, "'" },
{ "", kIdentity, " from " },
{ "", kIdentity, " by " },
{ "", kOmitFirst5, "" },
{ "", kOmitFirst6, "" },
{ " the ", kIdentity, "" },
{ "", kOmitLast4, "" },
{ "", kIdentity, ". The " },
{ "", kUppercaseAll, "" },
{ "", kIdentity, " on " },
{ "", kIdentity, " as " },
{ "", kIdentity, " is " },
{ "", kOmitLast7, "" },
{ "", kOmitLast1, "ing " },
{ "", kIdentity, "\n\t" },
{ "", kIdentity, ":" },
{ " ", kIdentity, ". " },
{ "", kIdentity, "ed " },
{ "", kOmitFirst9, "" },
{ "", kOmitFirst7, "" },
{ "", kOmitLast6, "" },
{ "", kIdentity, "(" },
{ "", kUppercaseFirst, ", " },
{ "", kOmitLast8, "" },
{ "", kIdentity, " at " },
{ "", kIdentity, "ly " },
{ " the ", kIdentity, " of " },
{ "", kOmitLast5, "" },
{ "", kOmitLast9, "" },
{ " ", kUppercaseFirst, ", " },
{ "", kUppercaseFirst, "\"" },
{ ".", kIdentity, "(" },
{ "", kUppercaseAll, " " },
{ "", kUppercaseFirst, "\">" },
{ "", kIdentity, "=\"" },
{ " ", kIdentity, "." },
{ ".com/", kIdentity, "" },
{ " the ", kIdentity, " of the " },
{ "", kUppercaseFirst, "'" },
{ "", kIdentity, ". This " },
{ "", kIdentity, "," },
{ ".", kIdentity, " " },
{ "", kUppercaseFirst, "(" },
{ "", kUppercaseFirst, "." },
{ "", kIdentity, " not " },
{ " ", kIdentity, "=\"" },
{ "", kIdentity, "er " },
{ " ", kUppercaseAll, " " },
{ "", kIdentity, "al " },
{ " ", kUppercaseAll, "" },
{ "", kIdentity, "='" },
{ "", kUppercaseAll, "\"" },
{ "", kUppercaseFirst, ". " },
{ " ", kIdentity, "(" },
{ "", kIdentity, "ful " },
{ " ", kUppercaseFirst, ". " },
{ "", kIdentity, "ive " },
{ "", kIdentity, "less " },
{ "", kUppercaseAll, "'" },
{ "", kIdentity, "est " },
{ " ", kUppercaseFirst, "." },
{ "", kUppercaseAll, "\">" },
{ " ", kIdentity, "='" },
{ "", kUppercaseFirst, "," },
{ "", kIdentity, "ize " },
{ "", kUppercaseAll, "." },
{ "\xc2\xa0", kIdentity, "" },
{ " ", kIdentity, "," },
{ "", kUppercaseFirst, "=\"" },
{ "", kUppercaseAll, "=\"" },
{ "", kIdentity, "ous " },
{ "", kUppercaseAll, ", " },
{ "", kUppercaseFirst, "='" },
{ " ", kUppercaseFirst, "," },
{ " ", kUppercaseAll, "=\"" },
{ " ", kUppercaseAll, ", " },
{ "", kUppercaseAll, "," },
{ "", kUppercaseAll, "(" },
{ "", kUppercaseAll, ". " },
{ " ", kUppercaseAll, "." },
{ "", kUppercaseAll, "='" },
{ " ", kUppercaseAll, ". " },
{ " ", kUppercaseFirst, "=\"" },
{ " ", kUppercaseAll, "='" },
{ " ", kUppercaseFirst, "='" },
};
static const int kNumTransforms = sizeof(kTransforms) / sizeof(kTransforms[0]);
static int ToUpperCase(uint8_t *p, int len) {
if (len == 1 || p[0] < 0xc0) {
if (p[0] >= 'a' && p[0] <= 'z') {
p[0] ^= 32;
}
return 1;
}
if (p[0] < 0xe0) {
p[1] ^= 32;
return 2;
}
if (len == 2) {
return 2;
}
p[2] ^= 5;
return 3;
}
inline std::string ApplyTransform(
const Transform& t, const uint8_t* word, int len) {
std::string ret(t.prefix);
if (t.word_transform <= kOmitLast9) {
len -= t.word_transform;
}
if (len > 0) {
if (t.word_transform >= kOmitFirst1) {
const int skip = t.word_transform - (kOmitFirst1 - 1);
if (len > skip) {
ret += std::string(word + skip, word + len);
}
} else {
ret += std::string(word, word + len);
uint8_t *uppercase = reinterpret_cast<uint8_t*>(&ret[ret.size() - len]);
if (t.word_transform == kUppercaseFirst) {
ToUpperCase(uppercase, len);
} else if (t.word_transform == kUppercaseAll) {
while (len > 0) {
int step = ToUpperCase(uppercase, len);
uppercase += step;
len -= step;
}
}
}
}
ret += std::string(t.suffix);
return ret;
}
inline std::string GetTransformedDictionaryWord(int len_code, int word_id) {
int num_words = 1 << kBrotliDictionarySizeBitsByLength[len_code];
int offset = kBrotliDictionaryOffsetsByLength[len_code];
int t = word_id / num_words;
int word_idx = word_id % num_words;
offset += len_code * word_idx;
const uint8_t* word = &kBrotliDictionary[offset];
return ApplyTransform(kTransforms[t], word, len_code);
}
} // namespace brotli
#endif // BROTLI_ENC_TRANSFORM_H_

View File

@ -0,0 +1,91 @@
// Copyright 2010 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Write bits into a byte array.
#ifndef BROTLI_ENC_WRITE_BITS_H_
#define BROTLI_ENC_WRITE_BITS_H_
#include <assert.h>
#include <stdint.h>
#include <stdio.h>
#include "./port.h"
namespace brotli {
//#define BIT_WRITER_DEBUG
// This function writes bits into bytes in increasing addresses, and within
// a byte least-significant-bit first.
//
// The function can write up to 56 bits in one go with WriteBits
// Example: let's assume that 3 bits (Rs below) have been written already:
//
// BYTE-0 BYTE+1 BYTE+2
//
// 0000 0RRR 0000 0000 0000 0000
//
// Now, we could write 5 or less bits in MSB by just sifting by 3
// and OR'ing to BYTE-0.
//
// For n bits, we take the last 5 bits, OR that with high bits in BYTE-0,
// and locate the rest in BYTE+1, BYTE+2, etc.
inline void WriteBits(int n_bits,
uint64_t bits,
int * __restrict pos,
uint8_t * __restrict array) {
#ifdef BIT_WRITER_DEBUG
printf("WriteBits %2d 0x%016llx %10d\n", n_bits, bits, *pos);
#endif
assert(bits < 1ULL << n_bits);
#ifdef IS_LITTLE_ENDIAN
// This branch of the code can write up to 56 bits at a time,
// 7 bits are lost by being perhaps already in *p and at least
// 1 bit is needed to initialize the bit-stream ahead (i.e. if 7
// bits are in *p and we write 57 bits, then the next write will
// access a byte that was never initialized).
uint8_t *p = &array[*pos >> 3];
uint64_t v = *p;
v |= bits << (*pos & 7);
BROTLI_UNALIGNED_STORE64(p, v); // Set some bits.
*pos += n_bits;
#else
// implicit & 0xff is assumed for uint8_t arithmetics
uint8_t *array_pos = &array[*pos >> 3];
const int bits_reserved_in_first_byte = (*pos & 7);
bits <<= bits_reserved_in_first_byte;
*array_pos++ |= bits;
for (int bits_left_to_write = n_bits - 8 + bits_reserved_in_first_byte;
bits_left_to_write >= 1;
bits_left_to_write -= 8) {
bits >>= 8;
*array_pos++ = bits;
}
*array_pos = 0;
*pos += n_bits;
#endif
}
inline void WriteBitsPrepareStorage(int pos, uint8_t *array) {
#ifdef BIT_WRITER_DEBUG
printf("WriteBitsPrepareStorage %10d\n", pos);
#endif
assert((pos & 7) == 0);
array[pos >> 3] = 0;
}
} // namespace brotli
#endif // BROTLI_ENC_WRITE_BITS_H_

View File

@ -0,0 +1,51 @@
// Emscripten wrapper
#include <emscripten/bind.h>
#include <stdlib.h>
#include "./woff2/woff2_enc.h"
using namespace emscripten;
using std::string;
int getSizePtr() {
int* sizePtr = reinterpret_cast<int*>(calloc(1, sizeof(int)));
return reinterpret_cast<int>(sizePtr);
}
int convert(int inputDataAddress, int inputLength, int outputSizePtrAddress) {
int* outputSizePtr = reinterpret_cast<int*>(outputSizePtrAddress);
char* inputData = reinterpret_cast<char*>(inputDataAddress);
size_t outputSize = woff2::MaxWOFF2CompressedSize(
reinterpret_cast<const uint8_t*>(inputData),
inputLength
);
uint8_t* outputData = reinterpret_cast<uint8_t*>(calloc(outputSize, sizeof(uint8_t)));
if(!woff2::ConvertTTFToWOFF2(
reinterpret_cast<const uint8_t*>(inputData),
inputLength,
outputData,
&outputSize
)) {
// throw an error
}
*outputSizePtr = outputSize;
return reinterpret_cast<int>(outputData);
}
void freePtrs(int outputDataAddress, int sizePtrAddress) {
int* sizePtr = reinterpret_cast<int*>(sizePtrAddress);
char* outputData = reinterpret_cast<char*>(outputDataAddress);
free(outputData);
free(sizePtr);
}
EMSCRIPTEN_BINDINGS(ttf2woff2_fallback) {
function("getSizePtr", &getSizePtr, allow_raw_pointers());
function("convert", &convert, allow_raw_pointers());
function("freePtrs", &freePtrs, allow_raw_pointers());
}

View File

@ -0,0 +1,172 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// The parts of ots.h & opentype-sanitiser.h that we need, taken from the
// https://code.google.com/p/ots/ project.
#ifndef WOFF2_BUFFER_H_
#define WOFF2_BUFFER_H_
#if defined(_WIN32)
#include <stdlib.h>
typedef signed char int8_t;
typedef unsigned char uint8_t;
typedef short int16_t;
typedef unsigned short uint16_t;
typedef int int32_t;
typedef unsigned int uint32_t;
typedef __int64 int64_t;
typedef unsigned __int64 uint64_t;
#define ntohl(x) _byteswap_ulong (x)
#define ntohs(x) _byteswap_ushort (x)
#define htonl(x) _byteswap_ulong (x)
#define htons(x) _byteswap_ushort (x)
#else
#include <arpa/inet.h>
#include <stdint.h>
#endif
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <limits>
namespace woff2 {
#if defined(_MSC_VER) || !defined(FONT_COMPRESSION_DEBUG)
#define FONT_COMPRESSION_FAILURE() false
#else
#define FONT_COMPRESSION_FAILURE() \
woff2::Failure(__FILE__, __LINE__, __PRETTY_FUNCTION__)
inline bool Failure(const char *f, int l, const char *fn) {
fprintf(stderr, "ERROR at %s:%d (%s)\n", f, l, fn);
fflush(stderr);
return false;
}
#endif
// -----------------------------------------------------------------------------
// Buffer helper class
//
// This class perform some trival buffer operations while checking for
// out-of-bounds errors. As a family they return false if anything is amiss,
// updating the current offset otherwise.
// -----------------------------------------------------------------------------
class Buffer {
public:
Buffer(const uint8_t *buffer, size_t len)
: buffer_(buffer),
length_(len),
offset_(0) { }
bool Skip(size_t n_bytes) {
return Read(NULL, n_bytes);
}
bool Read(uint8_t *buffer, size_t n_bytes) {
if (n_bytes > 1024 * 1024 * 1024) {
return FONT_COMPRESSION_FAILURE();
}
if ((offset_ + n_bytes > length_) ||
(offset_ > length_ - n_bytes)) {
return FONT_COMPRESSION_FAILURE();
}
if (buffer) {
std::memcpy(buffer, buffer_ + offset_, n_bytes);
}
offset_ += n_bytes;
return true;
}
inline bool ReadU8(uint8_t *value) {
if (offset_ + 1 > length_) {
return FONT_COMPRESSION_FAILURE();
}
*value = buffer_[offset_];
++offset_;
return true;
}
bool ReadU16(uint16_t *value) {
if (offset_ + 2 > length_) {
return FONT_COMPRESSION_FAILURE();
}
std::memcpy(value, buffer_ + offset_, sizeof(uint16_t));
*value = ntohs(*value);
offset_ += 2;
return true;
}
bool ReadS16(int16_t *value) {
return ReadU16(reinterpret_cast<uint16_t*>(value));
}
bool ReadU24(uint32_t *value) {
if (offset_ + 3 > length_) {
return FONT_COMPRESSION_FAILURE();
}
*value = static_cast<uint32_t>(buffer_[offset_]) << 16 |
static_cast<uint32_t>(buffer_[offset_ + 1]) << 8 |
static_cast<uint32_t>(buffer_[offset_ + 2]);
offset_ += 3;
return true;
}
bool ReadU32(uint32_t *value) {
if (offset_ + 4 > length_) {
return FONT_COMPRESSION_FAILURE();
}
std::memcpy(value, buffer_ + offset_, sizeof(uint32_t));
*value = ntohl(*value);
offset_ += 4;
return true;
}
bool ReadS32(int32_t *value) {
return ReadU32(reinterpret_cast<uint32_t*>(value));
}
bool ReadTag(uint32_t *value) {
if (offset_ + 4 > length_) {
return FONT_COMPRESSION_FAILURE();
}
std::memcpy(value, buffer_ + offset_, sizeof(uint32_t));
offset_ += 4;
return true;
}
bool ReadR64(uint64_t *value) {
if (offset_ + 8 > length_) {
return FONT_COMPRESSION_FAILURE();
}
std::memcpy(value, buffer_ + offset_, sizeof(uint64_t));
offset_ += 8;
return true;
}
const uint8_t *buffer() const { return buffer_; }
size_t offset() const { return offset_; }
size_t length() const { return length_; }
void set_offset(size_t newoffset) { offset_ = newoffset; }
private:
const uint8_t * const buffer_;
const size_t length_;
size_t offset_;
};
} // namespace woff2
#endif // WOFF2_BUFFER_H_

View File

@ -0,0 +1,400 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Font management utilities
#include "./font.h"
#include <algorithm>
#include "./buffer.h"
#include "./port.h"
#include "./store_bytes.h"
#include "./table_tags.h"
#include "./woff2_common.h"
namespace woff2 {
Font::Table* Font::FindTable(uint32_t tag) {
std::map<uint32_t, Font::Table>::iterator it = tables.find(tag);
return it == tables.end() ? 0 : &it->second;
}
const Font::Table* Font::FindTable(uint32_t tag) const {
std::map<uint32_t, Font::Table>::const_iterator it = tables.find(tag);
return it == tables.end() ? 0 : &it->second;
}
std::vector<uint32_t> Font::OutputOrderedTags() const {
std::vector<uint32_t> output_order;
for (const auto& i : tables) {
const Font::Table& table = i.second;
// This is a transformed table, we will write it together with the
// original version.
if (table.tag & 0x80808080) {
continue;
}
output_order.push_back(table.tag);
}
// Alphabetize and do not put loca immediately after glyf
// This violates woff2 spec but results in a font that passes OTS
std::sort(output_order.begin(), output_order.end());
// TODO(user): change to match spec once browsers are on newer OTS
/*
auto glyf_loc = std::find(output_order.begin(), output_order.end(),
kGlyfTableTag);
auto loca_loc = std::find(output_order.begin(), output_order.end(),
kLocaTableTag);
if (glyf_loc != output_order.end() && loca_loc != output_order.end()) {
output_order.erase(loca_loc);
output_order.insert(std::find(output_order.begin(), output_order.end(),
kGlyfTableTag) + 1, kLocaTableTag);
}*/
return output_order;
}
bool ReadTrueTypeFont(Buffer* file, const uint8_t* data, size_t len,
Font* font) {
// We don't care about the search_range, entry_selector and range_shift
// fields, they will always be computed upon writing the font.
if (!file->ReadU16(&font->num_tables) ||
!file->Skip(6)) {
return FONT_COMPRESSION_FAILURE();
}
std::map<uint32_t, uint32_t> intervals;
for (uint16_t i = 0; i < font->num_tables; ++i) {
Font::Table table;
table.reuse_of = NULL;
if (!file->ReadU32(&table.tag) ||
!file->ReadU32(&table.checksum) ||
!file->ReadU32(&table.offset) ||
!file->ReadU32(&table.length)) {
return FONT_COMPRESSION_FAILURE();
}
if ((table.offset & 3) != 0 ||
table.length > len ||
len - table.length < table.offset) {
return FONT_COMPRESSION_FAILURE();
}
intervals[table.offset] = table.length;
table.data = data + table.offset;
if (font->tables.find(table.tag) != font->tables.end()) {
return FONT_COMPRESSION_FAILURE();
}
font->tables[table.tag] = table;
}
// Check that tables are non-overlapping.
uint32_t last_offset = 12UL + 16UL * font->num_tables;
for (const auto& i : intervals) {
if (i.first < last_offset || i.first + i.second < i.first) {
return FONT_COMPRESSION_FAILURE();
}
last_offset = i.first + i.second;
}
return true;
}
bool ReadCollectionFont(Buffer* file, const uint8_t* data, size_t len,
Font* font,
std::map<uint32_t, Font::Table*>* all_tables) {
if (!file->ReadU32(&font->flavor)) {
return FONT_COMPRESSION_FAILURE();
}
if (!ReadTrueTypeFont(file, data, len, font)) {
return FONT_COMPRESSION_FAILURE();
}
for (auto& entry : font->tables) {
Font::Table& table = entry.second;
if (all_tables->find(table.offset) == all_tables->end()) {
(*all_tables)[table.offset] = font->FindTable(table.tag);
} else {
table.reuse_of = (*all_tables)[table.offset];
}
}
return true;
}
bool ReadTrueTypeCollection(Buffer* file, const uint8_t* data, size_t len,
FontCollection* font_collection) {
uint32_t num_fonts;
if (!file->ReadU32(&font_collection->header_version) ||
!file->ReadU32(&num_fonts)) {
return FONT_COMPRESSION_FAILURE();
}
std::vector<uint32_t> offsets;
for (auto i = 0; i < num_fonts; i++) {
uint32_t offset;
if (!file->ReadU32(&offset)) {
return FONT_COMPRESSION_FAILURE();
}
offsets.push_back(offset);
}
font_collection->fonts.resize(offsets.size());
std::vector<Font>::iterator font_it = font_collection->fonts.begin();
std::map<uint32_t, Font::Table*> all_tables;
for (const auto offset : offsets) {
file->set_offset(offset);
Font& font = *font_it++;
if (!ReadCollectionFont(file, data, len, &font, &all_tables)) {
return FONT_COMPRESSION_FAILURE();
}
}
return true;
}
bool ReadFont(const uint8_t* data, size_t len, Font* font) {
Buffer file(data, len);
if (!file.ReadU32(&font->flavor)) {
return FONT_COMPRESSION_FAILURE();
}
if (font->flavor == kTtcFontFlavor) {
return FONT_COMPRESSION_FAILURE();
}
return ReadTrueTypeFont(&file, data, len, font);
}
bool ReadFontCollection(const uint8_t* data, size_t len,
FontCollection* font_collection) {
Buffer file(data, len);
uint32_t flavor;
if (!file.ReadU32(&flavor)) {
return FONT_COMPRESSION_FAILURE();
}
if (flavor != kTtcFontFlavor) {
font_collection->fonts.resize(1);
Font& font = font_collection->fonts[0];
font.flavor = flavor;
return ReadTrueTypeFont(&file, data, len, &font);
}
return ReadTrueTypeCollection(&file, data, len, font_collection);
}
size_t FontFileSize(const Font& font) {
size_t max_offset = 12ULL + 16ULL * font.num_tables;
for (const auto& i : font.tables) {
const Font::Table& table = i.second;
size_t padding_size = (4 - (table.length & 3)) & 3;
size_t end_offset = (padding_size + table.offset) + table.length;
max_offset = std::max(max_offset, end_offset);
}
return max_offset;
}
size_t FontCollectionFileSize(const FontCollection& font_collection) {
size_t max_offset = 0;
for (auto& font : font_collection.fonts) {
// font file size actually just finds max offset
max_offset = std::max(max_offset, FontFileSize(font));
}
return max_offset;
}
bool WriteFont(const Font& font, uint8_t* dst, size_t dst_size) {
size_t offset = 0;
return WriteFont(font, &offset, dst, dst_size);
}
bool WriteTableRecord(const Font::Table* table, size_t* offset, uint8_t* dst,
size_t dst_size) {
if (dst_size < *offset + kSfntEntrySize) {
return FONT_COMPRESSION_FAILURE();
}
if (table->IsReused()) {
table = table->reuse_of;
}
StoreU32(table->tag, offset, dst);
StoreU32(table->checksum, offset, dst);
StoreU32(table->offset, offset, dst);
StoreU32(table->length, offset, dst);
return true;
}
bool WriteTable(const Font::Table& table, size_t* offset, uint8_t* dst,
size_t dst_size) {
if (!WriteTableRecord(&table, offset, dst, dst_size)) {
return false;
}
// Write the actual table data if it's the first time we've seen it
if (!table.IsReused()) {
if (table.offset + table.length < table.offset ||
dst_size < table.offset + table.length) {
return FONT_COMPRESSION_FAILURE();
}
memcpy(dst + table.offset, table.data, table.length);
size_t padding_size = (4 - (table.length & 3)) & 3;
if (table.offset + table.length + padding_size < padding_size ||
dst_size < table.offset + table.length + padding_size) {
return FONT_COMPRESSION_FAILURE();
}
memset(dst + table.offset + table.length, 0, padding_size);
}
return true;
}
bool WriteFont(const Font& font, size_t* offset, uint8_t* dst,
size_t dst_size) {
if (dst_size < 12ULL + 16ULL * font.num_tables) {
return FONT_COMPRESSION_FAILURE();
}
StoreU32(font.flavor, offset, dst);
Store16(font.num_tables, offset, dst);
uint16_t max_pow2 = font.num_tables ? Log2Floor(font.num_tables) : 0;
uint16_t search_range = max_pow2 ? 1 << (max_pow2 + 4) : 0;
uint16_t range_shift = (font.num_tables << 4) - search_range;
Store16(search_range, offset, dst);
Store16(max_pow2, offset, dst);
Store16(range_shift, offset, dst);
for (const auto& i : font.tables) {
if (!WriteTable(i.second, offset, dst, dst_size)) {
return false;
}
}
return true;
}
bool WriteFontCollection(const FontCollection& font_collection, uint8_t* dst,
size_t dst_size) {
size_t offset = 0;
// It's simpler if this just a simple sfnt
if (font_collection.fonts.size() == 1) {
return WriteFont(font_collection.fonts[0], &offset, dst, dst_size);
}
// Write TTC header
StoreU32(kTtcFontFlavor, &offset, dst);
StoreU32(font_collection.header_version, &offset, dst);
StoreU32(font_collection.fonts.size(), &offset, dst);
// Offset Table, zeroed for now
size_t offset_table = offset; // where to write offsets later
for (int i = 0; i < font_collection.fonts.size(); i++) {
StoreU32(0, &offset, dst);
}
if (font_collection.header_version == 0x00020000) {
StoreU32(0, &offset, dst); // ulDsigTag
StoreU32(0, &offset, dst); // ulDsigLength
StoreU32(0, &offset, dst); // ulDsigOffset
}
// Write fonts and their offsets.
for (int i = 0; i < font_collection.fonts.size(); i++) {
const auto& font = font_collection.fonts[i];
StoreU32(offset, &offset_table, dst);
if (!WriteFont(font, &offset, dst, dst_size)) {
return false;
}
}
return true;
}
int NumGlyphs(const Font& font) {
const Font::Table* head_table = font.FindTable(kHeadTableTag);
const Font::Table* loca_table = font.FindTable(kLocaTableTag);
if (head_table == NULL || loca_table == NULL || head_table->length < 52) {
return 0;
}
int index_fmt = IndexFormat(font);
int num_glyphs = (loca_table->length / (index_fmt == 0 ? 2 : 4)) - 1;
return num_glyphs;
}
int IndexFormat(const Font& font) {
const Font::Table* head_table = font.FindTable(kHeadTableTag);
if (head_table == NULL) {
return 0;
}
return head_table->data[51];
}
bool Font::Table::IsReused() const {
return this->reuse_of != NULL;
}
bool GetGlyphData(const Font& font, int glyph_index,
const uint8_t** glyph_data, size_t* glyph_size) {
if (glyph_index < 0) {
return FONT_COMPRESSION_FAILURE();
}
const Font::Table* head_table = font.FindTable(kHeadTableTag);
const Font::Table* loca_table = font.FindTable(kLocaTableTag);
const Font::Table* glyf_table = font.FindTable(kGlyfTableTag);
if (head_table == NULL || loca_table == NULL || glyf_table == NULL ||
head_table->length < 52) {
return FONT_COMPRESSION_FAILURE();
}
int index_fmt = IndexFormat(font);
Buffer loca_buf(loca_table->data, loca_table->length);
if (index_fmt == 0) {
uint16_t offset1, offset2;
if (!loca_buf.Skip(2 * glyph_index) ||
!loca_buf.ReadU16(&offset1) ||
!loca_buf.ReadU16(&offset2) ||
offset2 < offset1 ||
2 * offset2 > glyf_table->length) {
return FONT_COMPRESSION_FAILURE();
}
*glyph_data = glyf_table->data + 2 * offset1;
*glyph_size = 2 * (offset2 - offset1);
} else {
uint32_t offset1, offset2;
if (!loca_buf.Skip(4 * glyph_index) ||
!loca_buf.ReadU32(&offset1) ||
!loca_buf.ReadU32(&offset2) ||
offset2 < offset1 ||
offset2 > glyf_table->length) {
return FONT_COMPRESSION_FAILURE();
}
*glyph_data = glyf_table->data + offset1;
*glyph_size = offset2 - offset1;
}
return true;
}
bool RemoveDigitalSignature(Font* font) {
std::map<uint32_t, Font::Table>::iterator it =
font->tables.find(kDsigTableTag);
if (it != font->tables.end()) {
font->tables.erase(it);
font->num_tables = font->tables.size();
}
return true;
}
} // namespace woff2

View File

@ -0,0 +1,110 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Data model for a font file in sfnt format, reading and writing functions and
// accessors for the glyph data.
#ifndef WOFF2_FONT_H_
#define WOFF2_FONT_H_
#include <stddef.h>
#include <inttypes.h>
#include <map>
#include <vector>
namespace woff2 {
// Represents an sfnt font file. Only the table directory is parsed, for the
// table data we only store a raw pointer, therefore a font object is valid only
// as long the data from which it was parsed is around.
struct Font {
uint32_t flavor;
uint16_t num_tables;
struct Table {
uint32_t tag;
uint32_t checksum;
uint32_t offset;
uint32_t length;
const uint8_t* data;
// Buffer used to mutate the data before writing out.
std::vector<uint8_t> buffer;
// If we've seen this tag/offset before, pointer to the first time we saw it
// If this is the first time we've seen this table, NULL
// Intended use is to bypass re-processing tables
Font::Table* reuse_of;
// Is this table reused by a TTC
bool IsReused() const;
};
std::map<uint32_t, Table> tables;
std::vector<uint32_t> OutputOrderedTags() const;
Table* FindTable(uint32_t tag);
const Table* FindTable(uint32_t tag) const;
};
// Accomodates both singular (OTF, TTF) and collection (TTC) fonts
struct FontCollection {
uint32_t header_version;
// (offset, first use of table*) pairs
std::map<uint32_t, Font::Table*> tables;
std::vector<Font> fonts;
};
// Parses the font from the given data. Returns false on parsing failure or
// buffer overflow. The font is valid only so long the input data pointer is
// valid. Does NOT support collections.
bool ReadFont(const uint8_t* data, size_t len, Font* font);
// Parses the font from the given data. Returns false on parsing failure or
// buffer overflow. The font is valid only so long the input data pointer is
// valid. Supports collections.
bool ReadFontCollection(const uint8_t* data, size_t len, FontCollection* fonts);
// Returns the file size of the font.
size_t FontFileSize(const Font& font);
size_t FontCollectionFileSize(const FontCollection& font);
// Writes the font into the specified dst buffer. The dst_size should be the
// same as returned by FontFileSize(). Returns false upon buffer overflow (which
// should not happen if dst_size was computed by FontFileSize()).
bool WriteFont(const Font& font, uint8_t* dst, size_t dst_size);
// Write the font at a specific offset
bool WriteFont(const Font& font, size_t* offset, uint8_t* dst, size_t dst_size);
bool WriteFontCollection(const FontCollection& font_collection, uint8_t* dst,
size_t dst_size);
// Returns the number of glyphs in the font.
// NOTE: Currently this works only for TrueType-flavored fonts, will return
// zero for CFF-flavored fonts.
int NumGlyphs(const Font& font);
// Returns the index format of the font
int IndexFormat(const Font& font);
// Sets *glyph_data and *glyph_size to point to the location of the glyph data
// with the given index. Returns false if the glyph is not found.
bool GetGlyphData(const Font& font, int glyph_index,
const uint8_t** glyph_data, size_t* glyph_size);
// Removes the digital signature (DSIG) table
bool RemoveDigitalSignature(Font* font);
} // namespace woff2
#endif // WOFF2_FONT_H_

View File

@ -0,0 +1,380 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Glyph manipulation
#include "./glyph.h"
#include <stdlib.h>
#include <limits>
#include "./buffer.h"
#include "./store_bytes.h"
namespace woff2 {
static const int32_t kFLAG_ONCURVE = 1;
static const int32_t kFLAG_XSHORT = 1 << 1;
static const int32_t kFLAG_YSHORT = 1 << 2;
static const int32_t kFLAG_REPEAT = 1 << 3;
static const int32_t kFLAG_XREPEATSIGN = 1 << 4;
static const int32_t kFLAG_YREPEATSIGN = 1 << 5;
static const int32_t kFLAG_ARG_1_AND_2_ARE_WORDS = 1 << 0;
static const int32_t kFLAG_WE_HAVE_A_SCALE = 1 << 3;
static const int32_t kFLAG_MORE_COMPONENTS = 1 << 5;
static const int32_t kFLAG_WE_HAVE_AN_X_AND_Y_SCALE = 1 << 6;
static const int32_t kFLAG_WE_HAVE_A_TWO_BY_TWO = 1 << 7;
static const int32_t kFLAG_WE_HAVE_INSTRUCTIONS = 1 << 8;
bool ReadCompositeGlyphData(Buffer* buffer, Glyph* glyph) {
glyph->have_instructions = false;
glyph->composite_data = buffer->buffer() + buffer->offset();
size_t start_offset = buffer->offset();
uint16_t flags = kFLAG_MORE_COMPONENTS;
while (flags & kFLAG_MORE_COMPONENTS) {
if (!buffer->ReadU16(&flags)) {
return FONT_COMPRESSION_FAILURE();
}
glyph->have_instructions |= (flags & kFLAG_WE_HAVE_INSTRUCTIONS) != 0;
size_t arg_size = 2; // glyph index
if (flags & kFLAG_ARG_1_AND_2_ARE_WORDS) {
arg_size += 4;
} else {
arg_size += 2;
}
if (flags & kFLAG_WE_HAVE_A_SCALE) {
arg_size += 2;
} else if (flags & kFLAG_WE_HAVE_AN_X_AND_Y_SCALE) {
arg_size += 4;
} else if (flags & kFLAG_WE_HAVE_A_TWO_BY_TWO) {
arg_size += 8;
}
if (!buffer->Skip(arg_size)) {
return FONT_COMPRESSION_FAILURE();
}
}
if (buffer->offset() - start_offset > std::numeric_limits<uint32_t>::max()) {
return FONT_COMPRESSION_FAILURE();
}
glyph->composite_data_size = buffer->offset() - start_offset;
return true;
}
bool ReadGlyph(const uint8_t* data, size_t len, Glyph* glyph) {
Buffer buffer(data, len);
int16_t num_contours;
if (!buffer.ReadS16(&num_contours)) {
return FONT_COMPRESSION_FAILURE();
}
if (num_contours == 0) {
// Empty glyph.
return true;
}
// Read the bounding box.
if (!buffer.ReadS16(&glyph->x_min) ||
!buffer.ReadS16(&glyph->y_min) ||
!buffer.ReadS16(&glyph->x_max) ||
!buffer.ReadS16(&glyph->y_max)) {
return FONT_COMPRESSION_FAILURE();
}
if (num_contours > 0) {
// Simple glyph.
glyph->contours.resize(num_contours);
// Read the number of points per contour.
uint16_t last_point_index = 0;
for (int i = 0; i < num_contours; ++i) {
uint16_t point_index;
if (!buffer.ReadU16(&point_index)) {
return FONT_COMPRESSION_FAILURE();
}
uint16_t num_points = point_index - last_point_index + (i == 0 ? 1 : 0);
glyph->contours[i].resize(num_points);
last_point_index = point_index;
}
// Read the instructions.
if (!buffer.ReadU16(&glyph->instructions_size)) {
return FONT_COMPRESSION_FAILURE();
}
glyph->instructions_data = data + buffer.offset();
if (!buffer.Skip(glyph->instructions_size)) {
return FONT_COMPRESSION_FAILURE();
}
// Read the run-length coded flags.
std::vector<std::vector<uint8_t> > flags(num_contours);
uint8_t flag = 0;
uint8_t flag_repeat = 0;
for (int i = 0; i < num_contours; ++i) {
flags[i].resize(glyph->contours[i].size());
for (int j = 0; j < glyph->contours[i].size(); ++j) {
if (flag_repeat == 0) {
if (!buffer.ReadU8(&flag)) {
return FONT_COMPRESSION_FAILURE();
}
if (flag & kFLAG_REPEAT) {
if (!buffer.ReadU8(&flag_repeat)) {
return FONT_COMPRESSION_FAILURE();
}
}
} else {
flag_repeat--;
}
flags[i][j] = flag;
glyph->contours[i][j].on_curve = flag & kFLAG_ONCURVE;
}
}
// Read the x coordinates.
int prev_x = 0;
for (int i = 0; i < num_contours; ++i) {
for (int j = 0; j < glyph->contours[i].size(); ++j) {
uint8_t flag = flags[i][j];
if (flag & kFLAG_XSHORT) {
// single byte x-delta coord value
uint8_t x_delta;
if (!buffer.ReadU8(&x_delta)) {
return FONT_COMPRESSION_FAILURE();
}
int sign = (flag & kFLAG_XREPEATSIGN) ? 1 : -1;
glyph->contours[i][j].x = prev_x + sign * x_delta;
} else {
// double byte x-delta coord value
int16_t x_delta = 0;
if (!(flag & kFLAG_XREPEATSIGN)) {
if (!buffer.ReadS16(&x_delta)) {
return FONT_COMPRESSION_FAILURE();
}
}
glyph->contours[i][j].x = prev_x + x_delta;
}
prev_x = glyph->contours[i][j].x;
}
}
// Read the y coordinates.
int prev_y = 0;
for (int i = 0; i < num_contours; ++i) {
for (int j = 0; j < glyph->contours[i].size(); ++j) {
uint8_t flag = flags[i][j];
if (flag & kFLAG_YSHORT) {
// single byte y-delta coord value
uint8_t y_delta;
if (!buffer.ReadU8(&y_delta)) {
return FONT_COMPRESSION_FAILURE();
}
int sign = (flag & kFLAG_YREPEATSIGN) ? 1 : -1;
glyph->contours[i][j].y = prev_y + sign * y_delta;
} else {
// double byte y-delta coord value
int16_t y_delta = 0;
if (!(flag & kFLAG_YREPEATSIGN)) {
if (!buffer.ReadS16(&y_delta)) {
return FONT_COMPRESSION_FAILURE();
}
}
glyph->contours[i][j].y = prev_y + y_delta;
}
prev_y = glyph->contours[i][j].y;
}
}
} else if (num_contours == -1) {
// Composite glyph.
if (!ReadCompositeGlyphData(&buffer, glyph)) {
return FONT_COMPRESSION_FAILURE();
}
// Read the instructions.
if (glyph->have_instructions) {
if (!buffer.ReadU16(&glyph->instructions_size)) {
return FONT_COMPRESSION_FAILURE();
}
glyph->instructions_data = data + buffer.offset();
if (!buffer.Skip(glyph->instructions_size)) {
return FONT_COMPRESSION_FAILURE();
}
} else {
glyph->instructions_size = 0;
}
} else {
return FONT_COMPRESSION_FAILURE();
}
return true;
}
namespace {
void StoreBbox(const Glyph& glyph, size_t* offset, uint8_t* dst) {
Store16(glyph.x_min, offset, dst);
Store16(glyph.y_min, offset, dst);
Store16(glyph.x_max, offset, dst);
Store16(glyph.y_max, offset, dst);
}
void StoreInstructions(const Glyph& glyph, size_t* offset, uint8_t* dst) {
Store16(glyph.instructions_size, offset, dst);
StoreBytes(glyph.instructions_data, glyph.instructions_size, offset, dst);
}
bool StoreEndPtsOfContours(const Glyph& glyph, size_t* offset, uint8_t* dst) {
int end_point = -1;
for (const auto& contour : glyph.contours) {
end_point += contour.size();
if (contour.size() > std::numeric_limits<uint16_t>::max() ||
end_point > std::numeric_limits<uint16_t>::max()) {
return FONT_COMPRESSION_FAILURE();
}
Store16(end_point, offset, dst);
}
return true;
}
bool StorePoints(const Glyph& glyph, size_t* offset,
uint8_t* dst, size_t dst_size) {
int last_flag = -1;
int repeat_count = 0;
int last_x = 0;
int last_y = 0;
size_t x_bytes = 0;
size_t y_bytes = 0;
// Store the flags and calculate the total size of the x and y coordinates.
for (const auto& contour : glyph.contours) {
for (const auto& point : contour) {
int flag = point.on_curve ? kFLAG_ONCURVE : 0;
int dx = point.x - last_x;
int dy = point.y - last_y;
if (dx == 0) {
flag |= kFLAG_XREPEATSIGN;
} else if (dx > -256 && dx < 256) {
flag |= kFLAG_XSHORT | (dx > 0 ? kFLAG_XREPEATSIGN : 0);
x_bytes += 1;
} else {
x_bytes += 2;
}
if (dy == 0) {
flag |= kFLAG_YREPEATSIGN;
} else if (dy > -256 && dy < 256) {
flag |= kFLAG_YSHORT | (dy > 0 ? kFLAG_YREPEATSIGN : 0);
y_bytes += 1;
} else {
y_bytes += 2;
}
if (flag == last_flag && repeat_count != 255) {
dst[*offset - 1] |= kFLAG_REPEAT;
repeat_count++;
} else {
if (repeat_count != 0) {
if (*offset >= dst_size) {
return FONT_COMPRESSION_FAILURE();
}
dst[(*offset)++] = repeat_count;
}
if (*offset >= dst_size) {
return FONT_COMPRESSION_FAILURE();
}
dst[(*offset)++] = flag;
repeat_count = 0;
}
last_x = point.x;
last_y = point.y;
last_flag = flag;
}
}
if (repeat_count != 0) {
if (*offset >= dst_size) {
return FONT_COMPRESSION_FAILURE();
}
dst[(*offset)++] = repeat_count;
}
if (*offset + x_bytes + y_bytes > dst_size) {
return FONT_COMPRESSION_FAILURE();
}
// Store the x and y coordinates.
size_t x_offset = *offset;
size_t y_offset = *offset + x_bytes;
last_x = 0;
last_y = 0;
for (const auto& contour : glyph.contours) {
for (const auto& point : contour) {
int dx = point.x - last_x;
int dy = point.y - last_y;
if (dx == 0) {
// pass
} else if (dx > -256 && dx < 256) {
dst[x_offset++] = std::abs(dx);
} else {
Store16(dx, &x_offset, dst);
}
if (dy == 0) {
// pass
} else if (dy > -256 && dy < 256) {
dst[y_offset++] = std::abs(dy);
} else {
Store16(dy, &y_offset, dst);
}
last_x += dx;
last_y += dy;
}
}
*offset = y_offset;
return true;
}
} // namespace
bool StoreGlyph(const Glyph& glyph, uint8_t* dst, size_t* dst_size) {
size_t offset = 0;
if (glyph.composite_data_size > 0) {
// Composite glyph.
if (*dst_size < ((10ULL + glyph.composite_data_size) +
((glyph.have_instructions ? 2ULL : 0) +
glyph.instructions_size))) {
return FONT_COMPRESSION_FAILURE();
}
Store16(-1, &offset, dst);
StoreBbox(glyph, &offset, dst);
StoreBytes(glyph.composite_data, glyph.composite_data_size, &offset, dst);
if (glyph.have_instructions) {
StoreInstructions(glyph, &offset, dst);
}
} else if (glyph.contours.size() > 0) {
// Simple glyph.
if (glyph.contours.size() > std::numeric_limits<int16_t>::max()) {
return FONT_COMPRESSION_FAILURE();
}
if (*dst_size < ((12ULL + 2 * glyph.contours.size()) +
glyph.instructions_size)) {
return FONT_COMPRESSION_FAILURE();
}
Store16(glyph.contours.size(), &offset, dst);
StoreBbox(glyph, &offset, dst);
if (!StoreEndPtsOfContours(glyph, &offset, dst)) {
return FONT_COMPRESSION_FAILURE();
}
StoreInstructions(glyph, &offset, dst);
if (!StorePoints(glyph, &offset, dst, *dst_size)) {
return FONT_COMPRESSION_FAILURE();
}
}
*dst_size = offset;
return true;
}
} // namespace woff2

View File

@ -0,0 +1,71 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Data model and I/O for glyph data within sfnt format files for the purpose of
// performing the preprocessing step of the WOFF 2.0 conversion.
#ifndef WOFF2_GLYPH_H_
#define WOFF2_GLYPH_H_
#include <stddef.h>
#include <inttypes.h>
#include <vector>
namespace woff2 {
// Represents a parsed simple or composite glyph. The composite glyph data and
// instructions are un-parsed and we keep only pointers to the raw data,
// therefore the glyph is valid only so long the data from which it was parsed
// is around.
class Glyph {
public:
Glyph() : instructions_size(0), composite_data_size(0) {}
// Bounding box.
int16_t x_min;
int16_t x_max;
int16_t y_min;
int16_t y_max;
// Instructions.
uint16_t instructions_size;
const uint8_t* instructions_data;
// Data model for simple glyphs.
struct Point {
int x;
int y;
bool on_curve;
};
std::vector<std::vector<Point> > contours;
// Data for composite glyphs.
const uint8_t* composite_data;
uint32_t composite_data_size;
bool have_instructions;
};
// Parses the glyph from the given data. Returns false on parsing failure or
// buffer overflow. The glyph is valid only so long the input data pointer is
// valid.
bool ReadGlyph(const uint8_t* data, size_t len, Glyph* glyph);
// Stores the glyph into the specified dst buffer. The *dst_size is the buffer
// size on entry and is set to the actual (unpadded) stored size on exit.
// Returns false on buffer overflow.
bool StoreGlyph(const Glyph& glyph, uint8_t* dst, size_t* dst_size);
} // namespace woff2
#endif // WOFF2_GLYPH_H_

View File

@ -0,0 +1,319 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Glyph normalization
#include "./normalize.h"
#include <inttypes.h>
#include <stddef.h>
#include "./buffer.h"
#include "./port.h"
#include "./font.h"
#include "./glyph.h"
#include "./round.h"
#include "./store_bytes.h"
#include "./table_tags.h"
#include "./woff2_common.h"
namespace woff2 {
namespace {
void StoreLoca(int index_fmt, uint32_t value, size_t* offset, uint8_t* dst) {
if (index_fmt == 0) {
Store16(value >> 1, offset, dst);
} else {
StoreU32(value, offset, dst);
}
}
} // namespace
namespace {
bool WriteNormalizedLoca(int index_fmt, int num_glyphs, Font* font) {
Font::Table* glyf_table = font->FindTable(kGlyfTableTag);
Font::Table* loca_table = font->FindTable(kLocaTableTag);
int glyph_sz = index_fmt == 0 ? 2 : 4;
loca_table->buffer.resize(Round4(num_glyphs + 1) * glyph_sz);
loca_table->length = (num_glyphs + 1) * glyph_sz;
uint8_t* glyf_dst = &glyf_table->buffer[0];
uint8_t* loca_dst = &loca_table->buffer[0];
uint32_t glyf_offset = 0;
size_t loca_offset = 0;
for (int i = 0; i < num_glyphs; ++i) {
StoreLoca(index_fmt, glyf_offset, &loca_offset, loca_dst);
Glyph glyph;
const uint8_t* glyph_data;
size_t glyph_size;
if (!GetGlyphData(*font, i, &glyph_data, &glyph_size) ||
(glyph_size > 0 && !ReadGlyph(glyph_data, glyph_size, &glyph))) {
return FONT_COMPRESSION_FAILURE();
}
size_t glyf_dst_size = glyf_table->buffer.size() - glyf_offset;
if (!StoreGlyph(glyph, glyf_dst + glyf_offset, &glyf_dst_size)) {
return FONT_COMPRESSION_FAILURE();
}
glyf_dst_size = Round4(glyf_dst_size);
if (glyf_dst_size > std::numeric_limits<uint32_t>::max() ||
glyf_offset + static_cast<uint32_t>(glyf_dst_size) < glyf_offset ||
(index_fmt == 0 && glyf_offset + glyf_dst_size >= (1UL << 17))) {
return FONT_COMPRESSION_FAILURE();
}
glyf_offset += glyf_dst_size;
}
if (glyf_offset == 0) {
return false;
}
StoreLoca(index_fmt, glyf_offset, &loca_offset, loca_dst);
glyf_table->buffer.resize(glyf_offset);
glyf_table->data = &glyf_table->buffer[0];
glyf_table->length = glyf_offset;
loca_table->data = &loca_table->buffer[0];
return true;
}
} // namespace
namespace {
bool MakeEditableBuffer(Font* font, int tableTag) {
Font::Table* table = font->FindTable(tableTag);
if (table == NULL) {
return FONT_COMPRESSION_FAILURE();
}
if (table->IsReused()) {
return true;
}
int sz = Round4(table->length);
table->buffer.resize(sz);
uint8_t* buf = &table->buffer[0];
memcpy(buf, table->data, sz);
table->data = buf;
return true;
}
} // namespace
bool NormalizeGlyphs(Font* font) {
Font::Table* cff_table = font->FindTable(kCffTableTag);
Font::Table* head_table = font->FindTable(kHeadTableTag);
Font::Table* glyf_table = font->FindTable(kGlyfTableTag);
Font::Table* loca_table = font->FindTable(kLocaTableTag);
if (head_table == NULL) {
return FONT_COMPRESSION_FAILURE();
}
// CFF, no loca, no glyf is OK for CFF. If so, don't normalize.
if (cff_table != NULL && loca_table == NULL && glyf_table == NULL) {
return true;
}
if (loca_table == NULL || glyf_table == NULL) {
return FONT_COMPRESSION_FAILURE();
}
// Must share neither or both loca & glyf
if (loca_table->IsReused() != glyf_table->IsReused()) {
return FONT_COMPRESSION_FAILURE();
}
if (loca_table->IsReused()) {
return true;
}
int index_fmt = head_table->data[51];
int num_glyphs = NumGlyphs(*font);
// We need to allocate a bit more than its original length for the normalized
// glyf table, since it can happen that the glyphs in the original table are
// 2-byte aligned, while in the normalized table they are 4-byte aligned.
// That gives a maximum of 2 bytes increase per glyph. However, there is no
// theoretical guarantee that the total size of the flags plus the coordinates
// is the smallest possible in the normalized version, so we have to allow
// some general overhead.
// TODO(user) Figure out some more precise upper bound on the size of
// the overhead.
size_t max_normalized_glyf_size = 1.1 * glyf_table->length + 2 * num_glyphs;
glyf_table->buffer.resize(max_normalized_glyf_size);
// if we can't write a loca using short's (index_fmt 0)
// try again using longs (index_fmt 1)
if (!WriteNormalizedLoca(index_fmt, num_glyphs, font)) {
if (index_fmt != 0) {
return FONT_COMPRESSION_FAILURE();
}
// Rewrite loca with 4-byte entries & update head to match
index_fmt = 1;
if (!WriteNormalizedLoca(index_fmt, num_glyphs, font)) {
return FONT_COMPRESSION_FAILURE();
}
head_table->buffer[51] = 1;
}
return true;
}
bool NormalizeOffsets(Font* font) {
uint32_t offset = 12 + 16 * font->num_tables;
for (auto tag : font->OutputOrderedTags()) {
auto& table = font->tables[tag];
table.offset = offset;
offset += Round4(table.length);
}
return true;
}
namespace {
uint32_t ComputeHeaderChecksum(const Font& font) {
uint32_t checksum = font.flavor;
uint16_t max_pow2 = font.num_tables ? Log2Floor(font.num_tables) : 0;
uint16_t search_range = max_pow2 ? 1 << (max_pow2 + 4) : 0;
uint16_t range_shift = (font.num_tables << 4) - search_range;
checksum += (font.num_tables << 16 | search_range);
checksum += (max_pow2 << 16 | range_shift);
for (const auto& i : font.tables) {
const Font::Table* table = &i.second;
if (table->IsReused()) {
table = table->reuse_of;
}
checksum += table->tag;
checksum += table->checksum;
checksum += table->offset;
checksum += table->length;
}
return checksum;
}
} // namespace
bool FixChecksums(Font* font) {
Font::Table* head_table = font->FindTable(kHeadTableTag);
if (head_table == NULL) {
return FONT_COMPRESSION_FAILURE();
}
if (head_table->reuse_of != NULL) {
head_table = head_table->reuse_of;
}
if (head_table->length < 12) {
return FONT_COMPRESSION_FAILURE();
}
uint8_t* head_buf = &head_table->buffer[0];
size_t offset = 8;
StoreU32(0, &offset, head_buf);
uint32_t file_checksum = 0;
uint32_t head_checksum = 0;
for (auto& i : font->tables) {
Font::Table* table = &i.second;
if (table->IsReused()) {
table = table->reuse_of;
}
table->checksum = ComputeULongSum(table->data, table->length);
file_checksum += table->checksum;
if (table->tag == kHeadTableTag) {
head_checksum = table->checksum;
}
}
file_checksum += ComputeHeaderChecksum(*font);
offset = 8;
StoreU32(0xb1b0afba - file_checksum, &offset, head_buf);
return true;
}
namespace {
bool MarkTransformed(Font* font) {
Font::Table* head_table = font->FindTable(kHeadTableTag);
if (head_table == NULL) {
return FONT_COMPRESSION_FAILURE();
}
if (head_table->reuse_of != NULL) {
head_table = head_table->reuse_of;
}
if (head_table->length < 17) {
return FONT_COMPRESSION_FAILURE();
}
// set bit 11 of head table 'flags' to indicate that font has undergone
// lossless modifying transform
int head_flags = head_table->data[16];
head_table->buffer[16] = head_flags | 0x08;
return true;
}
} // namespace
bool NormalizeWithoutFixingChecksums(Font* font) {
return (MakeEditableBuffer(font, kHeadTableTag) &&
RemoveDigitalSignature(font) &&
MarkTransformed(font) &&
NormalizeGlyphs(font) &&
NormalizeOffsets(font));
}
bool NormalizeFont(Font* font) {
return (NormalizeWithoutFixingChecksums(font) &&
FixChecksums(font));
}
bool NormalizeFontCollection(FontCollection* font_collection) {
if (font_collection->fonts.size() == 1) {
return NormalizeFont(&font_collection->fonts[0]);
}
uint32_t offset = CollectionHeaderSize(font_collection->header_version,
font_collection->fonts.size());
for (auto& font : font_collection->fonts) {
if (!NormalizeWithoutFixingChecksums(&font)) {
fprintf(stderr, "Font normalization failed.\n");
return false;
}
offset += kSfntHeaderSize + kSfntEntrySize * font.num_tables;
}
// Start table offsets after TTC Header and Sfnt Headers
for (auto& font : font_collection->fonts) {
for (auto tag : font.OutputOrderedTags()) {
Font::Table& table = font.tables[tag];
if (table.IsReused()) {
table.offset = table.reuse_of->offset;
} else {
table.offset = offset;
offset += Round4(table.length);
}
}
}
// Now we can fix the checksums
for (auto& font : font_collection->fonts) {
if (!FixChecksums(&font)) {
fprintf(stderr, "Failed to fix checksums\n");
return false;
}
}
return true;
}
} // namespace woff2

View File

@ -0,0 +1,47 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Functions for normalizing fonts. Since the WOFF 2.0 decoder creates font
// files in normalized form, the WOFF 2.0 conversion is guaranteed to be
// lossless (in a bitwise sense) only for normalized font files.
#ifndef WOFF2_NORMALIZE_H_
#define WOFF2_NORMALIZE_H_
namespace woff2 {
struct Font;
struct FontCollection;
// Changes the offset fields of the table headers so that the data for the
// tables will be written in order of increasing tag values, without any gaps
// other than the 4-byte padding.
bool NormalizeOffsets(Font* font);
// Changes the checksum fields of the table headers and the checksum field of
// the head table so that it matches the current data.
bool FixChecksums(Font* font);
// Parses each of the glyphs in the font and writes them again to the glyf
// table in normalized form, as defined by the StoreGlyph() function. Changes
// the loca table accordigly.
bool NormalizeGlyphs(Font* font);
// Performs all of the normalization steps above.
bool NormalizeFont(Font* font);
bool NormalizeFontCollection(FontCollection* font_collection);
} // namespace woff2
#endif // WOFF2_NORMALIZE_H_

View File

@ -0,0 +1,48 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Helper function for bit twiddling
#ifndef WOFF2_PORT_H_
#define WOFF2_PORT_H_
#include <assert.h>
namespace woff2 {
typedef unsigned int uint32;
inline int Log2Floor(uint32 n) {
#if defined(__GNUC__)
return n == 0 ? -1 : 31 ^ __builtin_clz(n);
#else
if (n == 0)
return -1;
int log = 0;
uint32 value = n;
for (int i = 4; i >= 0; --i) {
int shift = (1 << i);
uint32 x = value >> shift;
if (x != 0) {
value = x;
log += shift;
}
}
assert(value == 1);
return log;
#endif
}
} // namespace woff2
#endif // WOFF2_PORT_H_

View File

@ -0,0 +1,35 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Helper for rounding
#ifndef WOFF2_ROUND_H_
#define WOFF2_ROUND_H_
#include <limits>
namespace woff2 {
// Round a value up to the nearest multiple of 4. Don't round the value in the
// case that rounding up overflows.
template<typename T> T Round4(T value) {
if (std::numeric_limits<T>::max() - value < 3) {
return value;
}
return (value + 3) & ~3;
}
} // namespace woff2
#endif // WOFF2_ROUND_H_

View File

@ -0,0 +1,61 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Helper functions for storing integer values into byte streams.
// No bounds checking is performed, that is the responsibility of the caller.
#ifndef WOFF2_STORE_BYTES_H_
#define WOFF2_STORE_BYTES_H_
#include <inttypes.h>
#include <stddef.h>
#include <string.h>
namespace woff2 {
inline size_t StoreU32(uint8_t* dst, size_t offset, uint32_t x) {
dst[offset] = x >> 24;
dst[offset + 1] = x >> 16;
dst[offset + 2] = x >> 8;
dst[offset + 3] = x;
return offset + 4;
}
inline size_t Store16(uint8_t* dst, size_t offset, int x) {
dst[offset] = x >> 8;
dst[offset + 1] = x;
return offset + 2;
}
inline void StoreU32(uint32_t val, size_t* offset, uint8_t* dst) {
dst[(*offset)++] = val >> 24;
dst[(*offset)++] = val >> 16;
dst[(*offset)++] = val >> 8;
dst[(*offset)++] = val;
}
inline void Store16(int val, size_t* offset, uint8_t* dst) {
dst[(*offset)++] = val >> 8;
dst[(*offset)++] = val;
}
inline void StoreBytes(const uint8_t* data, size_t len,
size_t* offset, uint8_t* dst) {
memcpy(&dst[*offset], data, len);
*offset += len;
}
} // namespace woff2
#endif // WOFF2_STORE_BYTES_H_

View File

@ -0,0 +1,90 @@
// Copyright 2014 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Font table tags
#include "./table_tags.h"
namespace woff2 {
// Note that the byte order is big-endian, not the same as ots.cc
#define TAG(a, b, c, d) ((a << 24) | (b << 16) | (c << 8) | d)
const uint32_t kKnownTags[63] = {
TAG('c', 'm', 'a', 'p'), // 0
TAG('h', 'e', 'a', 'd'), // 1
TAG('h', 'h', 'e', 'a'), // 2
TAG('h', 'm', 't', 'x'), // 3
TAG('m', 'a', 'x', 'p'), // 4
TAG('n', 'a', 'm', 'e'), // 5
TAG('O', 'S', '/', '2'), // 6
TAG('p', 'o', 's', 't'), // 7
TAG('c', 'v', 't', ' '), // 8
TAG('f', 'p', 'g', 'm'), // 9
TAG('g', 'l', 'y', 'f'), // 10
TAG('l', 'o', 'c', 'a'), // 11
TAG('p', 'r', 'e', 'p'), // 12
TAG('C', 'F', 'F', ' '), // 13
TAG('V', 'O', 'R', 'G'), // 14
TAG('E', 'B', 'D', 'T'), // 15
TAG('E', 'B', 'L', 'C'), // 16
TAG('g', 'a', 's', 'p'), // 17
TAG('h', 'd', 'm', 'x'), // 18
TAG('k', 'e', 'r', 'n'), // 19
TAG('L', 'T', 'S', 'H'), // 20
TAG('P', 'C', 'L', 'T'), // 21
TAG('V', 'D', 'M', 'X'), // 22
TAG('v', 'h', 'e', 'a'), // 23
TAG('v', 'm', 't', 'x'), // 24
TAG('B', 'A', 'S', 'E'), // 25
TAG('G', 'D', 'E', 'F'), // 26
TAG('G', 'P', 'O', 'S'), // 27
TAG('G', 'S', 'U', 'B'), // 28
TAG('E', 'B', 'S', 'C'), // 29
TAG('J', 'S', 'T', 'F'), // 30
TAG('M', 'A', 'T', 'H'), // 31
TAG('C', 'B', 'D', 'T'), // 32
TAG('C', 'B', 'L', 'C'), // 33
TAG('C', 'O', 'L', 'R'), // 34
TAG('C', 'P', 'A', 'L'), // 35
TAG('S', 'V', 'G', ' '), // 36
TAG('s', 'b', 'i', 'x'), // 37
TAG('a', 'c', 'n', 't'), // 38
TAG('a', 'v', 'a', 'r'), // 39
TAG('b', 'd', 'a', 't'), // 40
TAG('b', 'l', 'o', 'c'), // 41
TAG('b', 's', 'l', 'n'), // 42
TAG('c', 'v', 'a', 'r'), // 43
TAG('f', 'd', 's', 'c'), // 44
TAG('f', 'e', 'a', 't'), // 45
TAG('f', 'm', 't', 'x'), // 46
TAG('f', 'v', 'a', 'r'), // 47
TAG('g', 'v', 'a', 'r'), // 48
TAG('h', 's', 't', 'y'), // 49
TAG('j', 'u', 's', 't'), // 50
TAG('l', 'c', 'a', 'r'), // 51
TAG('m', 'o', 'r', 't'), // 52
TAG('m', 'o', 'r', 'x'), // 53
TAG('o', 'p', 'b', 'd'), // 54
TAG('p', 'r', 'o', 'p'), // 55
TAG('t', 'r', 'a', 'k'), // 56
TAG('Z', 'a', 'p', 'f'), // 57
TAG('S', 'i', 'l', 'f'), // 58
TAG('G', 'l', 'a', 't'), // 59
TAG('G', 'l', 'o', 'c'), // 60
TAG('F', 'e', 'a', 't'), // 61
TAG('S', 'i', 'l', 'l'), // 62
};
} // namespace woff2

View File

@ -0,0 +1,35 @@
// Copyright 2014 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Font table tags
#ifndef WOFF2_TABLE_TAGS_H_
#define WOFF2_TABLE_TAGS_H_
#include <inttypes.h>
namespace woff2 {
// Tags of popular tables.
static const uint32_t kGlyfTableTag = 0x676c7966;
static const uint32_t kHeadTableTag = 0x68656164;
static const uint32_t kLocaTableTag = 0x6c6f6361;
static const uint32_t kDsigTableTag = 0x44534947;
static const uint32_t kCffTableTag = 0x43464620;
extern const uint32_t kKnownTags[];
} // namespace woff2
#endif // WOFF2_TABLE_TAGS_H_

View File

@ -0,0 +1,288 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Library for preprocessing fonts as part of the WOFF 2.0 conversion.
#include "./transform.h"
#include <complex> // for std::abs
#include "./buffer.h"
#include "./font.h"
#include "./glyph.h"
#include "./table_tags.h"
#include "./variable_length.h"
namespace woff2 {
namespace {
const int FLAG_ARG_1_AND_2_ARE_WORDS = 1 << 0;
const int FLAG_WE_HAVE_INSTRUCTIONS = 1 << 8;
void WriteBytes(std::vector<uint8_t>* out, const uint8_t* data, size_t len) {
if (len == 0) return;
size_t offset = out->size();
out->resize(offset + len);
memcpy(&(*out)[offset], data, len);
}
void WriteBytes(std::vector<uint8_t>* out, const std::vector<uint8_t>& in) {
for (int i = 0; i < in.size(); ++i) {
out->push_back(in[i]);
}
}
void WriteUShort(std::vector<uint8_t>* out, int value) {
out->push_back(value >> 8);
out->push_back(value & 255);
}
void WriteLong(std::vector<uint8_t>* out, int value) {
out->push_back((value >> 24) & 255);
out->push_back((value >> 16) & 255);
out->push_back((value >> 8) & 255);
out->push_back(value & 255);
}
// Glyf table preprocessing, based on
// GlyfEncoder.java
class GlyfEncoder {
public:
explicit GlyfEncoder(int num_glyphs)
: n_glyphs_(num_glyphs) {
bbox_bitmap_.resize(((num_glyphs + 31) >> 5) << 2);
}
bool Encode(int glyph_id, const Glyph& glyph) {
if (glyph.composite_data_size > 0) {
WriteCompositeGlyph(glyph_id, glyph);
} else if (glyph.contours.size() > 0) {
WriteSimpleGlyph(glyph_id, glyph);
} else {
WriteUShort(&n_contour_stream_, 0);
}
return true;
}
void GetTransformedGlyfBytes(std::vector<uint8_t>* result) {
WriteLong(result, 0); // version
WriteUShort(result, n_glyphs_);
WriteUShort(result, 0); // index_format, will be set later
WriteLong(result, n_contour_stream_.size());
WriteLong(result, n_points_stream_.size());
WriteLong(result, flag_byte_stream_.size());
WriteLong(result, glyph_stream_.size());
WriteLong(result, composite_stream_.size());
WriteLong(result, bbox_bitmap_.size() + bbox_stream_.size());
WriteLong(result, instruction_stream_.size());
WriteBytes(result, n_contour_stream_);
WriteBytes(result, n_points_stream_);
WriteBytes(result, flag_byte_stream_);
WriteBytes(result, glyph_stream_);
WriteBytes(result, composite_stream_);
WriteBytes(result, bbox_bitmap_);
WriteBytes(result, bbox_stream_);
WriteBytes(result, instruction_stream_);
}
private:
void WriteInstructions(const Glyph& glyph) {
Write255UShort(&glyph_stream_, glyph.instructions_size);
WriteBytes(&instruction_stream_,
glyph.instructions_data, glyph.instructions_size);
}
bool ShouldWriteSimpleGlyphBbox(const Glyph& glyph) {
if (glyph.contours.empty() || glyph.contours[0].empty()) {
return glyph.x_min || glyph.y_min || glyph.x_max || glyph.y_max;
}
int16_t x_min = glyph.contours[0][0].x;
int16_t y_min = glyph.contours[0][0].y;
int16_t x_max = x_min;
int16_t y_max = y_min;
for (const auto& contour : glyph.contours) {
for (const auto& point : contour) {
if (point.x < x_min) x_min = point.x;
if (point.x > x_max) x_max = point.x;
if (point.y < y_min) y_min = point.y;
if (point.y > y_max) y_max = point.y;
}
}
if (glyph.x_min != x_min)
return true;
if (glyph.y_min != y_min)
return true;
if (glyph.x_max != x_max)
return true;
if (glyph.y_max != y_max)
return true;
return false;
}
void WriteSimpleGlyph(int glyph_id, const Glyph& glyph) {
int num_contours = glyph.contours.size();
WriteUShort(&n_contour_stream_, num_contours);
if (ShouldWriteSimpleGlyphBbox(glyph)) {
WriteBbox(glyph_id, glyph);
}
for (int i = 0; i < num_contours; i++) {
Write255UShort(&n_points_stream_, glyph.contours[i].size());
}
int lastX = 0;
int lastY = 0;
for (int i = 0; i < num_contours; i++) {
int num_points = glyph.contours[i].size();
for (int j = 0; j < num_points; j++) {
int x = glyph.contours[i][j].x;
int y = glyph.contours[i][j].y;
int dx = x - lastX;
int dy = y - lastY;
WriteTriplet(glyph.contours[i][j].on_curve, dx, dy);
lastX = x;
lastY = y;
}
}
if (num_contours > 0) {
WriteInstructions(glyph);
}
}
void WriteCompositeGlyph(int glyph_id, const Glyph& glyph) {
WriteUShort(&n_contour_stream_, -1);
WriteBbox(glyph_id, glyph);
WriteBytes(&composite_stream_,
glyph.composite_data,
glyph.composite_data_size);
if (glyph.have_instructions) {
WriteInstructions(glyph);
}
}
void WriteBbox(int glyph_id, const Glyph& glyph) {
bbox_bitmap_[glyph_id >> 3] |= 0x80 >> (glyph_id & 7);
WriteUShort(&bbox_stream_, glyph.x_min);
WriteUShort(&bbox_stream_, glyph.y_min);
WriteUShort(&bbox_stream_, glyph.x_max);
WriteUShort(&bbox_stream_, glyph.y_max);
}
void WriteTriplet(bool on_curve, int x, int y) {
int abs_x = std::abs(x);
int abs_y = std::abs(y);
int on_curve_bit = on_curve ? 0 : 128;
int x_sign_bit = (x < 0) ? 0 : 1;
int y_sign_bit = (y < 0) ? 0 : 1;
int xy_sign_bits = x_sign_bit + 2 * y_sign_bit;
if (x == 0 && abs_y < 1280) {
flag_byte_stream_.push_back(on_curve_bit +
((abs_y & 0xf00) >> 7) + y_sign_bit);
glyph_stream_.push_back(abs_y & 0xff);
} else if (y == 0 && abs_x < 1280) {
flag_byte_stream_.push_back(on_curve_bit + 10 +
((abs_x & 0xf00) >> 7) + x_sign_bit);
glyph_stream_.push_back(abs_x & 0xff);
} else if (abs_x < 65 && abs_y < 65) {
flag_byte_stream_.push_back(on_curve_bit + 20 +
((abs_x - 1) & 0x30) +
(((abs_y - 1) & 0x30) >> 2) +
xy_sign_bits);
glyph_stream_.push_back((((abs_x - 1) & 0xf) << 4) | ((abs_y - 1) & 0xf));
} else if (abs_x < 769 && abs_y < 769) {
flag_byte_stream_.push_back(on_curve_bit + 84 +
12 * (((abs_x - 1) & 0x300) >> 8) +
(((abs_y - 1) & 0x300) >> 6) + xy_sign_bits);
glyph_stream_.push_back((abs_x - 1) & 0xff);
glyph_stream_.push_back((abs_y - 1) & 0xff);
} else if (abs_x < 4096 && abs_y < 4096) {
flag_byte_stream_.push_back(on_curve_bit + 120 + xy_sign_bits);
glyph_stream_.push_back(abs_x >> 4);
glyph_stream_.push_back(((abs_x & 0xf) << 4) | (abs_y >> 8));
glyph_stream_.push_back(abs_y & 0xff);
} else {
flag_byte_stream_.push_back(on_curve_bit + 124 + xy_sign_bits);
glyph_stream_.push_back(abs_x >> 8);
glyph_stream_.push_back(abs_x & 0xff);
glyph_stream_.push_back(abs_y >> 8);
glyph_stream_.push_back(abs_y & 0xff);
}
}
std::vector<uint8_t> n_contour_stream_;
std::vector<uint8_t> n_points_stream_;
std::vector<uint8_t> flag_byte_stream_;
std::vector<uint8_t> composite_stream_;
std::vector<uint8_t> bbox_bitmap_;
std::vector<uint8_t> bbox_stream_;
std::vector<uint8_t> glyph_stream_;
std::vector<uint8_t> instruction_stream_;
int n_glyphs_;
};
} // namespace
bool TransformGlyfAndLocaTables(Font* font) {
// no transform for CFF
const Font::Table* glyf_table = font->FindTable(kGlyfTableTag);
const Font::Table* loca_table = font->FindTable(kLocaTableTag);
if (font->FindTable(kCffTableTag) != NULL
&& glyf_table == NULL
&& loca_table == NULL) {
return true;
}
// Must share neither or both loca/glyf
if (glyf_table->IsReused() != loca_table->IsReused()) {
return FONT_COMPRESSION_FAILURE();
}
if (glyf_table->IsReused()) {
return true;
}
Font::Table* transformed_glyf = &font->tables[kGlyfTableTag ^ 0x80808080];
Font::Table* transformed_loca = &font->tables[kLocaTableTag ^ 0x80808080];
int num_glyphs = NumGlyphs(*font);
GlyfEncoder encoder(num_glyphs);
for (int i = 0; i < num_glyphs; ++i) {
Glyph glyph;
const uint8_t* glyph_data;
size_t glyph_size;
if (!GetGlyphData(*font, i, &glyph_data, &glyph_size) ||
(glyph_size > 0 && !ReadGlyph(glyph_data, glyph_size, &glyph))) {
return FONT_COMPRESSION_FAILURE();
}
encoder.Encode(i, glyph);
}
encoder.GetTransformedGlyfBytes(&transformed_glyf->buffer);
const Font::Table* head_table = font->FindTable(kHeadTableTag);
if (head_table == NULL || head_table->length < 52) {
return FONT_COMPRESSION_FAILURE();
}
transformed_glyf->buffer[7] = head_table->data[51]; // index_format
transformed_glyf->tag = kGlyfTableTag ^ 0x80808080;
transformed_glyf->length = transformed_glyf->buffer.size();
transformed_glyf->data = transformed_glyf->buffer.data();
transformed_loca->tag = kLocaTableTag ^ 0x80808080;
transformed_loca->length = 0;
transformed_loca->data = NULL;
return true;
}
} // namespace woff2

View File

@ -0,0 +1,31 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Library for preprocessing fonts as part of the WOFF 2.0 conversion.
#ifndef WOFF2_TRANSFORM_H_
#define WOFF2_TRANSFORM_H_
#include "./font.h"
namespace woff2 {
// Adds the transformed versions of the glyf and loca tables to the font. The
// transformed loca table has zero length. The tag of the transformed tables is
// derived from the original tag by flipping the MSBs of every byte.
bool TransformGlyfAndLocaTables(Font* font);
} // namespace woff2
#endif // WOFF2_TRANSFORM_H_

View File

@ -0,0 +1,133 @@
// Copyright 2015 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Helper functions for woff2 variable length types: 255UInt16 and UIntBase128
#include "./variable_length.h"
namespace woff2 {
size_t Size255UShort(uint16_t value) {
size_t result = 3;
if (value < 253) {
result = 1;
} else if (value < 762) {
result = 2;
} else {
result = 3;
}
return result;
}
void Write255UShort(std::vector<uint8_t>* out, int value) {
if (value < 253) {
out->push_back(value);
} else if (value < 506) {
out->push_back(255);
out->push_back(value - 253);
} else if (value < 762) {
out->push_back(254);
out->push_back(value - 506);
} else {
out->push_back(253);
out->push_back(value >> 8);
out->push_back(value & 0xff);
}
}
void Store255UShort(int val, size_t* offset, uint8_t* dst) {
std::vector<uint8_t> packed;
Write255UShort(&packed, val);
for (uint8_t val : packed) {
dst[(*offset)++] = val;
}
}
// Based on section 6.1.1 of MicroType Express draft spec
bool Read255UShort(Buffer* buf, unsigned int* value) {
static const int kWordCode = 253;
static const int kOneMoreByteCode2 = 254;
static const int kOneMoreByteCode1 = 255;
static const int kLowestUCode = 253;
uint8_t code = 0;
if (!buf->ReadU8(&code)) {
return FONT_COMPRESSION_FAILURE();
}
if (code == kWordCode) {
uint16_t result = 0;
if (!buf->ReadU16(&result)) {
return FONT_COMPRESSION_FAILURE();
}
*value = result;
return true;
} else if (code == kOneMoreByteCode1) {
uint8_t result = 0;
if (!buf->ReadU8(&result)) {
return FONT_COMPRESSION_FAILURE();
}
*value = result + kLowestUCode;
return true;
} else if (code == kOneMoreByteCode2) {
uint8_t result = 0;
if (!buf->ReadU8(&result)) {
return FONT_COMPRESSION_FAILURE();
}
*value = result + kLowestUCode * 2;
return true;
} else {
*value = code;
return true;
}
}
bool ReadBase128(Buffer* buf, uint32_t* value) {
uint32_t result = 0;
for (size_t i = 0; i < 5; ++i) {
uint8_t code = 0;
if (!buf->ReadU8(&code)) {
return FONT_COMPRESSION_FAILURE();
}
// If any of the top seven bits are set then we're about to overflow.
if (result & 0xfe000000) {
return FONT_COMPRESSION_FAILURE();
}
result = (result << 7) | (code & 0x7f);
if ((code & 0x80) == 0) {
*value = result;
return true;
}
}
// Make sure not to exceed the size bound
return FONT_COMPRESSION_FAILURE();
}
size_t Base128Size(size_t n) {
size_t size = 1;
for (; n >= 128; n >>= 7) ++size;
return size;
}
void StoreBase128(size_t len, size_t* offset, uint8_t* dst) {
size_t size = Base128Size(len);
for (int i = 0; i < size; ++i) {
int b = static_cast<int>((len >> (7 * (size - i - 1))) & 0x7f);
if (i < size - 1) {
b |= 0x80;
}
dst[(*offset)++] = b;
}
}
} // namespace woff2

View File

@ -0,0 +1,38 @@
// Copyright 2015 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Helper functions for woff2 variable length types: 255UInt16 and UIntBase128
#ifndef WOFF2_VARIABLE_LENGTH_H_
#define WOFF2_VARIABLE_LENGTH_H_
#include <inttypes.h>
#include <vector>
#include "./buffer.h"
namespace woff2 {
size_t Size255UShort(uint16_t value);
bool Read255UShort(Buffer* buf, unsigned int* value);
void Write255UShort(std::vector<uint8_t>* out, int value);
void Store255UShort(int val, size_t* offset, uint8_t* dst);
size_t Base128Size(size_t n);
bool ReadBase128(Buffer* buf, uint32_t* value);
void StoreBase128(size_t len, size_t* offset, uint8_t* dst);
} // namespace woff2
#endif // WOFF2_VARIABLE_LENGTH_H_

View File

@ -0,0 +1,46 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Helpers common across multiple parts of woff2
#include <algorithm>
#include "./woff2_common.h"
namespace woff2 {
uint32_t ComputeULongSum(const uint8_t* buf, size_t size) {
uint32_t checksum = 0;
for (size_t i = 0; i < size; i += 4) {
// We assume the addition is mod 2^32, which is valid because unsigned
checksum += (buf[i] << 24) | (buf[i + 1] << 16) |
(buf[i + 2] << 8) | buf[i + 3];
}
return checksum;
}
size_t CollectionHeaderSize(uint32_t header_version, uint32_t num_fonts) {
size_t size = 0;
if (header_version == 0x00020000) {
size += 12; // ulDsig{Tag,Length,Offset}
}
if (header_version == 0x00010000 || header_version == 0x00020000) {
size += 12 // TTCTag, Version, numFonts
+ 4 * num_fonts; // OffsetTable[numFonts]
}
return size;
}
} // namespace woff2

View File

@ -0,0 +1,72 @@
// Copyright 2014 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Common definition for WOFF2 encoding/decoding
#ifndef WOFF2_WOFF2_COMMON_H_
#define WOFF2_WOFF2_COMMON_H_
#include <stddef.h>
#include <inttypes.h>
#include <string>
namespace woff2 {
static const uint32_t kWoff2Signature = 0x774f4632; // "wOF2"
const unsigned int kWoff2FlagsContinueStream = 1 << 4;
const unsigned int kWoff2FlagsTransform = 1 << 5;
// TrueType Collection ID string: 'ttcf'
static const uint32_t kTtcFontFlavor = 0x74746366;
static const size_t kSfntHeaderSize = 12;
static const size_t kSfntEntrySize = 16;
struct Point {
int x;
int y;
bool on_curve;
};
struct Table {
uint32_t tag;
uint32_t flags;
uint32_t src_offset;
uint32_t src_length;
uint32_t transform_length;
uint32_t dst_offset;
uint32_t dst_length;
const uint8_t* dst_data;
bool operator<(const Table& other) const {
return tag < other.tag;
}
};
// Size of the collection header. 0 if version indicates this isn't a
// collection. Ref http://www.microsoft.com/typography/otspec/otff.htm,
// True Type Collections
size_t CollectionHeaderSize(uint32_t header_version, uint32_t num_fonts);
// Compute checksum over size bytes of buf
uint32_t ComputeULongSum(const uint8_t* buf, size_t size);
} // namespace woff2
#endif // WOFF2_WOFF2_COMMON_H_

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,36 @@
// Copyright 2014 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Library for converting WOFF2 format font files to their TTF versions.
#ifndef WOFF2_WOFF2_DEC_H_
#define WOFF2_WOFF2_DEC_H_
#include <stddef.h>
#include <inttypes.h>
namespace woff2 {
// Compute the size of the final uncompressed font, or 0 on error.
size_t ComputeWOFF2FinalSize(const uint8_t *data, size_t length);
// Decompresses the font into the target buffer. The result_length should
// be the same as determined by ComputeFinalSize(). Returns true on successful
// decompression.
bool ConvertWOFF2ToTTF(uint8_t *result, size_t result_length,
const uint8_t *data, size_t length);
} // namespace woff2
#endif // WOFF2_WOFF2_DEC_H_

View File

@ -0,0 +1,443 @@
// Copyright 2014 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Library for converting TTF format font files to their WOFF2 versions.
#include "./woff2_enc.h"
#include <stdlib.h>
#include <complex>
#include <cstring>
#include <limits>
#include <string>
#include <vector>
#include "./buffer.h"
#include "./../enc/encode.h"
#include "./font.h"
#include "./normalize.h"
#include "./round.h"
#include "./store_bytes.h"
#include "./table_tags.h"
#include "./transform.h"
#include "./variable_length.h"
#include "./woff2_common.h"
namespace woff2 {
namespace {
using std::string;
using std::vector;
const size_t kWoff2HeaderSize = 48;
const size_t kWoff2EntrySize = 20;
bool Compress(const uint8_t* data, const size_t len,
uint8_t* result, uint32_t* result_len,
brotli::BrotliParams::Mode mode) {
size_t compressed_len = *result_len;
brotli::BrotliParams params;
params.mode = mode;
if (brotli::BrotliCompressBuffer(params, len, data, &compressed_len, result)
== 0) {
return false;
}
*result_len = compressed_len;
return true;
}
bool Woff2Compress(const uint8_t* data, const size_t len,
uint8_t* result, uint32_t* result_len) {
return Compress(data, len, result, result_len,
brotli::BrotliParams::MODE_FONT);
}
bool TextCompress(const uint8_t* data, const size_t len,
uint8_t* result, uint32_t* result_len) {
return Compress(data, len, result, result_len,
brotli::BrotliParams::MODE_TEXT);
}
int KnownTableIndex(uint32_t tag) {
for (int i = 0; i < 63; ++i) {
if (tag == kKnownTags[i]) return i;
}
return 63;
}
void StoreTableEntry(const Table& table, size_t* offset, uint8_t* dst) {
uint8_t flag_byte = KnownTableIndex(table.tag);
dst[(*offset)++] = flag_byte;
// The index here is treated as a set of flag bytes because
// bits 6 and 7 of the byte are reserved for future use as flags.
// 0x3f or 63 means an arbitrary table tag.
if ((flag_byte & 0x3f) == 0x3f) {
StoreU32(table.tag, offset, dst);
}
StoreBase128(table.src_length, offset, dst);
if ((table.flags & kWoff2FlagsTransform) != 0) {
StoreBase128(table.transform_length, offset, dst);
}
}
size_t TableEntrySize(const Table& table) {
uint8_t flag_byte = KnownTableIndex(table.tag);
size_t size = ((flag_byte & 0x3f) != 0x3f) ? 1 : 5;
size += Base128Size(table.src_length);
if ((table.flags & kWoff2FlagsTransform) != 0) {
size += Base128Size(table.transform_length);
}
return size;
}
size_t ComputeWoff2Length(const FontCollection& font_collection,
const std::vector<Table>& tables,
std::map<uint32_t, uint16_t> index_by_offset,
size_t extended_metadata_length) {
size_t size = kWoff2HeaderSize;
for (const auto& table : tables) {
size += TableEntrySize(table);
}
// for collections only, collection tables
if (font_collection.fonts.size() > 1) {
size += 4; // UInt32 Version of TTC Header
size += Size255UShort(font_collection.fonts.size()); // 255UInt16 numFonts
size += 4 * font_collection.fonts.size(); // UInt32 flavor for each
for (const auto& font : font_collection.fonts) {
size += Size255UShort(font.tables.size()); // 255UInt16 numTables
for (const auto& entry : font.tables) {
const Font::Table& table = entry.second;
// no collection entry for xform table
if (table.tag & 0x80808080) continue;
uint16_t table_index = index_by_offset[table.offset];
size += Size255UShort(table_index); // 255UInt16 index entry
}
}
}
// compressed data
for (const auto& table : tables) {
size += table.dst_length;
size = Round4(size);
}
size += extended_metadata_length;
return size;
}
size_t ComputeTTFLength(const std::vector<Table>& tables) {
size_t size = 12 + 16 * tables.size(); // sfnt header
for (const auto& table : tables) {
size += Round4(table.src_length);
}
return size;
}
size_t ComputeUncompressedLength(const Font& font) {
// sfnt header + offset table
size_t size = 12 + 16 * font.num_tables;
for (const auto& entry : font.tables) {
const Font::Table& table = entry.second;
if (table.tag & 0x80808080) continue; // xform tables don't stay
if (table.IsReused()) continue; // don't have to pay twice
size += Round4(table.length);
}
return size;
}
size_t ComputeUncompressedLength(const FontCollection& font_collection) {
if (font_collection.fonts.size() == 1) {
return ComputeUncompressedLength(font_collection.fonts[0]);
}
size_t size = CollectionHeaderSize(font_collection.header_version,
font_collection.fonts.size());
for (const auto& font : font_collection.fonts) {
size += ComputeUncompressedLength(font);
}
return size;
}
size_t ComputeTotalTransformLength(const Font& font) {
size_t total = 0;
for (const auto& i : font.tables) {
const Font::Table& table = i.second;
if (table.IsReused()) {
continue;
}
if (table.tag & 0x80808080 || !font.FindTable(table.tag ^ 0x80808080)) {
// Count transformed tables and non-transformed tables that do not have
// transformed versions.
total += table.length;
}
}
return total;
}
} // namespace
size_t MaxWOFF2CompressedSize(const uint8_t* data, size_t length) {
return MaxWOFF2CompressedSize(data, length, "");
}
size_t MaxWOFF2CompressedSize(const uint8_t* data, size_t length,
const string& extended_metadata) {
// Except for the header size, which is 32 bytes larger in woff2 format,
// all other parts should be smaller (table header in short format,
// transformations and compression). Just to be sure, we will give some
// headroom anyway.
return length + 1024 + extended_metadata.length();
}
uint32_t CompressedBufferSize(uint32_t original_size) {
return 1.2 * original_size + 10240;
}
bool ConvertTTFToWOFF2(const uint8_t *data, size_t length,
uint8_t *result, size_t *result_length) {
return ConvertTTFToWOFF2(data, length, result, result_length, "");
}
bool TransformFontCollection(FontCollection* font_collection) {
for (auto& font : font_collection->fonts) {
if (!TransformGlyfAndLocaTables(&font)) {
fprintf(stderr, "Font transformation failed.\n");
return false;
}
}
return true;
}
bool ConvertTTFToWOFF2(const uint8_t *data, size_t length,
uint8_t *result, size_t *result_length,
const string& extended_metadata) {
FontCollection font_collection;
if (!ReadFontCollection(data, length, &font_collection)) {
fprintf(stderr, "Parsing of the input font failed.\n");
return false;
}
if (!NormalizeFontCollection(&font_collection)) {
return false;
}
if (!TransformFontCollection(&font_collection)) {
return false;
}
// Although the compressed size of each table in the final woff2 file won't
// be larger than its transform_length, we have to allocate a large enough
// buffer for the compressor, since the compressor can potentially increase
// the size. If the compressor overflows this, it should return false and
// then this function will also return false.
size_t total_transform_length = 0;
for (const auto& font : font_collection.fonts) {
total_transform_length += ComputeTotalTransformLength(font);
}
size_t compression_buffer_size = CompressedBufferSize(total_transform_length);
std::vector<uint8_t> compression_buf(compression_buffer_size);
uint32_t total_compressed_length = compression_buffer_size;
// Collect all transformed data into one place.
std::vector<uint8_t> transform_buf(total_transform_length);
size_t transform_offset = 0;
for (const auto& font : font_collection.fonts) {
for (const auto& i : font.tables) {
const Font::Table* table = font.FindTable(i.second.tag ^ 0x80808080);
if (i.second.IsReused()) continue;
if (i.second.tag & 0x80808080) continue;
if (table == NULL) table = &i.second;
StoreBytes(table->data, table->length,
&transform_offset, &transform_buf[0]);
}
}
// Compress all transformed data in one stream.
if (!Woff2Compress(transform_buf.data(), total_transform_length,
&compression_buf[0],
&total_compressed_length)) {
fprintf(stderr, "Compression of combined table failed.\n");
return false;
}
// Compress the extended metadata
// TODO(user): how does this apply to collections
uint32_t compressed_metadata_buf_length =
CompressedBufferSize(extended_metadata.length());
std::vector<uint8_t> compressed_metadata_buf(compressed_metadata_buf_length);
if (extended_metadata.length() > 0) {
if (!TextCompress((const uint8_t*)extended_metadata.data(),
extended_metadata.length(),
compressed_metadata_buf.data(),
&compressed_metadata_buf_length)) {
fprintf(stderr, "Compression of extended metadata failed.\n");
return false;
}
} else {
compressed_metadata_buf_length = 0;
}
std::vector<Table> tables;
std::map<uint32_t, uint16_t> index_by_offset;
for (const auto& font : font_collection.fonts) {
for (const auto tag : font.OutputOrderedTags()) {
const Font::Table& src_table = font.tables.at(tag);
if (src_table.IsReused()) {
continue;
}
if (index_by_offset.find(src_table.offset) == index_by_offset.end()) {
index_by_offset[src_table.offset] = tables.size();
} else {
return false;
}
Table table;
table.tag = src_table.tag;
table.flags = 0;
table.src_length = src_table.length;
table.transform_length = src_table.length;
const uint8_t* transformed_data = src_table.data;
const Font::Table* transformed_table =
font.FindTable(src_table.tag ^ 0x80808080);
if (transformed_table != NULL) {
table.flags |= kWoff2FlagsTransform;
table.transform_length = transformed_table->length;
transformed_data = transformed_table->data;
}
if (tables.empty()) {
table.dst_length = total_compressed_length;
table.dst_data = &compression_buf[0];
} else {
table.dst_length = 0;
table.dst_data = NULL;
table.flags |= kWoff2FlagsContinueStream;
}
tables.push_back(table);
}
}
size_t woff2_length = ComputeWoff2Length(font_collection, tables,
index_by_offset, compressed_metadata_buf_length);
if (woff2_length > *result_length) {
fprintf(stderr, "Result allocation was too small (%zd vs %zd bytes).\n",
*result_length, woff2_length);
return false;
}
*result_length = woff2_length;
const Font& first_font = font_collection.fonts[0];
size_t offset = 0;
// start of woff2 header (http://www.w3.org/TR/WOFF2/#woff20Header)
StoreU32(kWoff2Signature, &offset, result);
if (font_collection.fonts.size() == 1) {
StoreU32(first_font.flavor, &offset, result);
} else {
StoreU32(kTtcFontFlavor, &offset, result);
}
StoreU32(woff2_length, &offset, result);
Store16(tables.size(), &offset, result);
Store16(0, &offset, result); // reserved
// totalSfntSize
StoreU32(ComputeUncompressedLength(font_collection), &offset, result);
StoreU32(total_compressed_length, &offset, result); // totalCompressedSize
// TODO(user): is always taking this from the first tables head OK?
// font revision
StoreBytes(first_font.FindTable(kHeadTableTag)->data + 4, 4, &offset, result);
if (compressed_metadata_buf_length > 0) {
StoreU32(woff2_length - compressed_metadata_buf_length,
&offset, result); // metaOffset
StoreU32(compressed_metadata_buf_length, &offset, result); // metaLength
StoreU32(extended_metadata.length(), &offset, result); // metaOrigLength
} else {
StoreU32(0, &offset, result); // metaOffset
StoreU32(0, &offset, result); // metaLength
StoreU32(0, &offset, result); // metaOrigLength
}
StoreU32(0, &offset, result); // privOffset
StoreU32(0, &offset, result); // privLength
// end of woff2 header
// table directory (http://www.w3.org/TR/WOFF2/#table_dir_format)
for (const auto& table : tables) {
StoreTableEntry(table, &offset, result);
}
// for collections only, collection table directory
if (font_collection.fonts.size() > 1) {
StoreU32(font_collection.header_version, &offset, result);
Store255UShort(font_collection.fonts.size(), &offset, result);
for (const Font& font : font_collection.fonts) {
uint16_t num_tables = 0;
for (const auto& entry : font.tables) {
const Font::Table& table = entry.second;
if (table.tag & 0x80808080) continue; // don't write xform tables
num_tables++;
}
Store255UShort(num_tables, &offset, result);
StoreU32(font.flavor, &offset, result);
for (const auto& entry : font.tables) {
const Font::Table& table = entry.second;
if (table.tag & 0x80808080) continue; // don't write xform tables
// for reused tables, only the original has an updated offset
uint32_t table_offset =
table.IsReused() ? table.reuse_of->offset : table.offset;
uint32_t table_length =
table.IsReused() ? table.reuse_of->length : table.length;
if (index_by_offset.find(table_offset) == index_by_offset.end()) {
fprintf(stderr, "Missing table index for offset 0x%08x\n",
table_offset);
return false;
}
uint16_t index = index_by_offset[table_offset];
Store255UShort(index, &offset, result);
}
}
}
// compressed data format (http://www.w3.org/TR/WOFF2/#table_format)
for (const auto& table : tables) {
StoreBytes(table.dst_data, table.dst_length, &offset, result);
offset = Round4(offset);
}
StoreBytes(compressed_metadata_buf.data(), compressed_metadata_buf_length,
&offset, result);
if (*result_length != offset) {
fprintf(stderr, "Mismatch between computed and actual length "
"(%zd vs %zd)\n", *result_length, offset);
return false;
}
return true;
}
} // namespace woff2

View File

@ -0,0 +1,46 @@
// Copyright 2014 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Library for converting WOFF2 format font files to their TTF versions.
#ifndef WOFF2_WOFF2_ENC_H_
#define WOFF2_WOFF2_ENC_H_
#include <stddef.h>
#include <inttypes.h>
#include <string>
using std::string;
namespace woff2 {
// Returns an upper bound on the size of the compressed file.
size_t MaxWOFF2CompressedSize(const uint8_t* data, size_t length);
size_t MaxWOFF2CompressedSize(const uint8_t* data, size_t length,
const string& extended_metadata);
// Compresses the font into the target buffer. *result_length should be at least
// the value returned by MaxWOFF2CompressedSize(), upon return, it is set to the
// actual compressed size. Returns true on successful compression.
bool ConvertTTFToWOFF2(const uint8_t *data, size_t length,
uint8_t *result, size_t *result_length);
bool ConvertTTFToWOFF2(const uint8_t *data, size_t length,
uint8_t *result, size_t *result_length,
const string& extended_metadata);
} // namespace woff2
#endif // WOFF2_WOFF2_ENC_H_