blob: 6b52dc92d45ef8e9eb32ff70b9c838fc5b718def [file] [log] [blame]
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./avif.h"
#include <algorithm>
#include <cassert>
#include <cstring>
#include <sstream>
#include <string>
#include "./av1_obu.h"
#include "./bit_packing.h"
#include "./container.h"
namespace container {
//------------------------------------------------------------------------------
namespace {
// ISO/IEC 23008-12 HEIC "irot" + "imir" boxes to Container::Orientation values.
// MIAF Section 7.3.6.7 says to apply rotation, then mirror.
constexpr Orientation kOrientationMapping[4][3] = {
// No rotation.
{Orientation::kOriginal, // No mirroring.
Orientation::kFlipTopToBottom, // Mirror top to bottom.
Orientation::kFlipLeftToRight}, // Mirror left to right.
// 90 degrees counter-clockwise.
{Orientation::k270Clockwise, // No mirroring.
Orientation::kFlipBotLeftToTopRgt, // Mirror top to bottom.
Orientation::kFlipTopLeftToBotRgt}, // Mirror left to right.
// 180 degrees.
{Orientation::k180, // No mirroring.
Orientation::kFlipLeftToRight, // Mirror top to bottom.
Orientation::kFlipTopToBottom}, // Mirror left to right.
// 270 degrees counter-clockwise.
{Orientation::k90Clockwise, // No mirroring.
Orientation::kFlipTopLeftToBotRgt, // Mirror top to bottom.
Orientation::kFlipBotLeftToTopRgt}, // Mirror left to right.
};
// Container::Orientation values to HEIC "irot" + "imir" boxes.
void GetIrotImir(Orientation orientation, uint8_t& irot, uint8_t& imir) {
for (irot = 0; irot < 4; ++irot) {
for (imir = 0; imir < 3; ++imir) {
if (kOrientationMapping[irot][imir] == orientation) return;
}
}
assert(false);
}
// Big-endian unsigned integer coding of 'value'.
// 'output' must be at least 'num_bytes'-long.
void WriteBigEndian(uint32_t value, uint8_t output[], uint32_t num_bytes = 1) {
for (uint32_t i = 0; i < num_bytes; ++i) {
output[i] = (value >> ((num_bytes - i - 1) * 8)) & 0xFFu;
}
}
// 'input' must be at least 'num_bytes'-long.
uint32_t ReadBigEndian(const uint8_t input[], uint32_t num_bytes = 1) {
uint32_t value = 0;
for (uint32_t i = 0; i < num_bytes; ++i) {
value = (value << 8) | input[i];
}
return value;
}
// Returns true if the four characters in 'fourcc' match the 'str'.
bool EqualFourCC(const uint8_t fourcc[4], const char str[]) {
return std::equal(fourcc, fourcc + 4, str);
}
//------------------------------------------------------------------------------
// ISOBMFF box writer that uses the dtor to finalize the encoding.
class BoxWriter {
public:
BoxWriter(const char fourcc[4], Data& output)
: output_(output), box_offset_(output.size()) {
Write<uint32_t>(8); // size placeholder, replaced in dtor
WriteStr(fourcc); // box tag
}
~BoxWriter() {
const size_t box_size = output_.size() - box_offset_;
WriteBigEndian(box_size, output_.data() + box_offset_, sizeof(uint32_t));
}
// Appends 'value'. CodedType is used only for its number of bits and must be
// specified explicitly.
template <typename CodedType>
void Write(uint32_t value) {
output_.resize(output_.size() + sizeof(CodedType));
WriteBigEndian(value, output_.data() + (output_.size() - sizeof(CodedType)),
sizeof(CodedType));
}
// Appends 'data_size' bytes of 'data'.
void WriteBytes(const uint8_t data[], size_t data_size) {
output_.insert(output_.end(), data, data + data_size);
}
// Appends 'str'. 'append_0' includes the null-terminating character \0.
void WriteStr(const char str[], bool append_0 = false) {
const size_t str_size = std::strlen(str) + (append_0 ? 1 : 0);
output_.reserve(output_.size() + str_size);
for (uint32_t i = 0; i < str_size; ++i) {
output_.emplace_back(static_cast<uint8_t>(str[i]));
}
}
// Appends the 32 bits of additional header needed by some ISOBMFF boxes.
void WriteFullBoxHeader(uint8_t version = 0, uint32_t flags = 0) {
Write<uint32_t>((version << 24) | flags);
}
private:
Data& output_;
const size_t box_offset_; // Position of the 4 bytes for box size.
};
} // namespace
//------------------------------------------------------------------------------
bool WrapAvif(const Image& image, const uint8_t codec_bytes[],
size_t num_codec_bytes, Data& data) {
// Decode the necessary information from the Sequence Header. AVIF requires
// some of these fields to match between the MIAF container and AV1-OBU.
SequenceHeaderObu header;
if (!UnwrapObuSequenceHeader(codec_bytes, num_codec_bytes, header)) {
return false;
}
// Position in the bitstream of the encoded value of the offset of the codec
// payload within the bitstream.
size_t offset_of_codec_bytes_offset_within_file;
{ // ISO/IEC 14496-12 - 4.3.2 File Type Box Syntax
BoxWriter ftyp("ftyp", data); // FileTypeBox
ftyp.WriteStr("avif"); // major_brand
ftyp.Write<uint32_t>(0); // minor_version
ftyp.WriteStr("avifmif1miaf"); // compatible_brands
// Valid AVIF files shall specify the "avif", "miaf" and "mif1" brands.
// https://aomediacodec.github.io/av1-avif/#file-constraints requires
// "ISO/IEC 23000-22 - 7.2.1 Box-level requirements on image items" needing
// "ISO/IEC 23008-12 - 10.2.1.1 "mif1" structural brand file requirements".
// Baseline Profile "MA1B" is only for Main Profile and Level at most 5.1.
// https://aomediacodec.github.io/av1-avif/#baseline-profile
// Advanced Profile "MA1A" is only for High Profile and Level at most 6.0.
// https://aomediacodec.github.io/av1-avif/#advanced-profile
// See "AV1 Bitstream & Decoding Process Specification - Annex A".
if (header.seq_profile == 0 && header.seq_level_idx_0 <= 13) {
ftyp.WriteStr("MA1B"); // compatible_brands
} else if (header.seq_profile == 1 && header.seq_level_idx_0 <= 16) {
ftyp.WriteStr("MA1A"); // compatible_brands
}
}
{ // ISO/IEC 14496-12 - 8.11.1.2 Meta Box Syntax
BoxWriter meta("meta", data); // MetaBox, mandatory because "mif1"
meta.WriteFullBoxHeader();
{ // ISO/IEC 14496-12 - 8.4.3.2 Handler Reference Box Syntax
BoxWriter hdlr("hdlr", data); // HandlerBox, mandatory because "mif1"
hdlr.WriteFullBoxHeader();
hdlr.Write<uint32_t>(0); // pre_defined
hdlr.WriteStr("pict"); // handler_type
hdlr.Write<uint32_t>(0); // reserved
hdlr.Write<uint32_t>(0); // reserved
hdlr.Write<uint32_t>(0); // reserved
hdlr.WriteStr("", /*append_0=*/true); // name of the library used for
// encoding (unknown)
}
{ // ISO/IEC 14496-12 - 8.11.4.2 Primary Item Box Syntax
// Note: According to "ISO/IEC 14496-12-8.11.1.2", the "pitm" box should
// be just after "hdlr", but not in "ISO/IEC 23008-12-10.2.1.1".
BoxWriter pitm("pitm", data); // PrimaryItemBox, mandatory because "mif1"
pitm.WriteFullBoxHeader();
pitm.Write<uint16_t>(1); // item_ID
}
{ // ISO/IEC 14496-12 - 8.11.3.2 Item Location Box Syntax
BoxWriter iloc("iloc", data); // ItemLocationBox, mandatory bc "mif1"
iloc.WriteFullBoxHeader();
iloc.Write<uint8_t>((0x0 << 4) | 0x4); // offset_size, length_size
iloc.Write<uint8_t>((0x4 << 4) | 0x0); // base_offset_size, reserved
iloc.Write<uint16_t>(1); // item_count
iloc.Write<uint16_t>(1); // item_ID
iloc.Write<uint16_t>(0); // data_reference_index
offset_of_codec_bytes_offset_within_file = data.size(); // For later.
iloc.Write<uint32_t>(0); // base_offset (unknown yet)
iloc.Write<uint16_t>(1); // extent_count
iloc.Write<uint32_t>(num_codec_bytes); // extent_length
}
{ // ISO/IEC 14496-12 - 8.11.6.2 Item Information Box Syntax
BoxWriter iinf("iinf", data); // ItemInfoBox, mandatory because "mif1"
iinf.WriteFullBoxHeader();
const uint32_t entry_count = 1;
iinf.Write<uint16_t>(entry_count);
for (uint32_t i = 0; i < entry_count; ++i) {
BoxWriter infe("infe", data); // ItemInfoEntry, mandatory bc "mif1"
infe.WriteFullBoxHeader(/*version=*/2);
infe.Write<uint16_t>(i + 1); // item_ID
infe.Write<uint16_t>(0); // item_protection_index
infe.WriteStr("av01"); // item_type
infe.WriteStr("Image", /*append_0=*/true); // item_name
}
}
{ // ISO/IEC 23008-12 - 9.3.2 Item Properties Box Syntax
BoxWriter iprp("iprp", data); // ItemPropertiesBox, mandatory bc "mif1"
uint32_t num_properties = 0; // Needed by "ipma".
{
BoxWriter ipco("ipco", data); // ItemPropertyContainerBox
{ // "av1C" is mandatory for any "infe" item of type "av01".
// See https://aomediacodec.github.io/av1-avif/ 2.2.1
// and https://aomediacodec.github.io/av1-isobmff/ 2.3.3
BoxWriter ispe("av1C", data); // AV1CodecConfigurationBox
// The values in "av1C" should be equal to the ones defined in the OBU
// Sequence Header.
BitPacker ispe_fields;
ispe_fields.EncodeUInt(1, 1); // marker
ispe_fields.EncodeUInt(1, 7); // version
ispe_fields.EncodeUInt(header.seq_profile, 3);
ispe_fields.EncodeUInt(header.seq_level_idx_0, 5);
ispe_fields.EncodeBool(false); // seq_tier_0
ispe_fields.EncodeBool(header.high_bitdepth);
ispe_fields.EncodeBool(header.twelve_bit);
ispe_fields.EncodeBool(header.mono_chrome);
ispe_fields.EncodeUInt(header.subsampling_x, 1);
ispe_fields.EncodeUInt(header.subsampling_y, 1);
ispe_fields.EncodeUInt(header.chroma_sample_position, 2);
ispe_fields.EncodeUInt(0, 3); // reserved
ispe_fields.EncodeUInt(0, 1); // initial_presentation_delay_present
ispe_fields.EncodeUInt(0, 4); // reserved
if (ispe_fields.GetNumBits() != 32) return false;
ispe.WriteBytes(ispe_fields.GetBytes(), ispe_fields.GetNumBits() / 8);
// "Sequence Header OBUs should not be present in the
// AV1CodecConfigurationBox" so leave configOBUs empty.
++num_properties;
}
{
BoxWriter ispe("ispe", data); // ImageSpatialExtentsProperty
ispe.WriteFullBoxHeader();
ispe.Write<uint32_t>(image.width);
ispe.Write<uint32_t>(image.height);
++num_properties;
}
uint8_t rotation, mirror;
GetIrotImir(image.orientation, rotation, mirror);
if (rotation != 0) {
BoxWriter irot("irot", data); // ImageRotation
irot.Write<uint8_t>(rotation);
++num_properties;
}
if (mirror != 0) {
BoxWriter imir("imir", data); // ImageMirror
imir.Write<uint8_t>(mirror - 1);
++num_properties;
}
{
BoxWriter pixi("pixi", data); // PixelInformationProperty
pixi.WriteFullBoxHeader();
pixi.Write<uint8_t>(3); // Number of channels.
const uint8_t bit_depth =
header.high_bitdepth ? header.twelve_bit ? 12 : 10 : 8;
pixi.Write<uint8_t>(bit_depth);
pixi.Write<uint8_t>(bit_depth);
pixi.Write<uint8_t>(bit_depth);
++num_properties;
}
}
{
BoxWriter ipma("ipma", data); // ItemPropertyAssociation
ipma.WriteFullBoxHeader();
ipma.Write<uint32_t>(1); // entry_count
ipma.Write<uint16_t>(1); // item_ID
ipma.Write<uint8_t>(num_properties); // association_count
for (uint32_t i = 0; i < num_properties; ++i) {
ipma.Write<uint8_t>(0x80 | (i + 1)); // 1b essential + 7b index
}
}
}
}
{
BoxWriter mdat("mdat", data); // MediaDataBox
// Write in the "iloc" box where the 'codec_bytes' begin in the file.
WriteBigEndian(data.size(), &data[offset_of_codec_bytes_offset_within_file],
sizeof(uint32_t));
// Append the 'codec_bytes'.
data.insert(data.end(), codec_bytes, codec_bytes + num_codec_bytes);
}
return true;
}
//------------------------------------------------------------------------------
namespace {
// Minimal AVIF parsing to extract a few images features.
// Returns false in case of error.
bool ParseBoxes(const uint8_t data[], size_t data_size, Image& image,
bool& saw_ftyp, const uint8_t*& codec_bytes,
size_t& num_codec_bytes, uint32_t& angle, uint32_t& mirror) {
size_t position = 0;
while (position + 4 <= data_size) {
const size_t size = ReadBigEndian(data + position, sizeof(uint32_t));
if (size < 8 || position + size > data_size) return false;
const uint8_t* const fourcc = data + position + 4;
const bool has_fullbox_header = EqualFourCC(fourcc, "meta") ||
EqualFourCC(fourcc, "ispe") ||
EqualFourCC(fourcc, "pixi");
const size_t offset = (has_fullbox_header ? 12 : 8);
if (size < offset) return false;
const uint8_t* content = data + position + offset;
const size_t content_size = size - offset;
if (EqualFourCC(fourcc, "ftyp")) {
if (size < 12) return false;
if (!EqualFourCC(content, "avif")) return false;
saw_ftyp = true;
} else if (EqualFourCC(fourcc, "meta") || EqualFourCC(fourcc, "iprp") ||
EqualFourCC(fourcc, "ipco")) {
if (!ParseBoxes(content, content_size, image, saw_ftyp, codec_bytes,
num_codec_bytes, angle, mirror)) {
return false;
}
} else if (EqualFourCC(fourcc, "irot")) {
if (content_size < 1) return false;
angle = ReadBigEndian(content);
if (angle > 3) return false;
} else if (EqualFourCC(fourcc, "imir")) {
if (content_size < 1) return false;
mirror = 1 + ReadBigEndian(content);
if (mirror > 2) return false;
} else if (EqualFourCC(fourcc, "ispe")) {
if (content_size < 4 + 4) return false;
image.width = ReadBigEndian(content + 0, 4);
image.height = ReadBigEndian(content + 4, 4);
} else if (EqualFourCC(fourcc, "pixi")) {
if (content_size < 1) return false;
const uint8_t num_channels = ReadBigEndian(content + 0);
if (content_size < 1 + num_channels) return false;
for (uint32_t i = 0; i < num_channels; ++i) {
const uint8_t bit_depth = ReadBigEndian(content + 1 + i);
if (bit_depth == ReadBigEndian(content + 1)) {
// Good enough mapping for now.
image.format = (bit_depth == 8) ? Format::kARGB8
: (bit_depth == 10) ? Format::kAYUV10
: (bit_depth == 12) ? Format::kAYUV12
: Format::kUnknown;
} else {
return false;
}
}
} else if (EqualFourCC(fourcc, "stss")) {
// TODO(yguyon): Read frame durations
} else if (EqualFourCC(fourcc, "mdat")) {
codec_bytes = content;
num_codec_bytes = content_size;
} else {
// Skip any unknown box.
}
position += size;
}
return (position == data_size); // No trailing garbage allowed.
}
} // namespace
bool UnwrapAvif(const uint8_t data[], size_t data_size, Image& image,
size_t& offset_till_codec_bytes, size_t& num_codec_bytes) {
image = Image();
bool saw_ftyp = false;
offset_till_codec_bytes = num_codec_bytes = 0;
uint32_t angle = 0, mirror = 0;
const uint8_t* codec_bytes = nullptr;
if (!ParseBoxes(data, data_size, image, saw_ftyp, codec_bytes,
num_codec_bytes, angle, mirror)) {
return false;
}
if (!saw_ftyp || num_codec_bytes == 0) return false;
offset_till_codec_bytes = codec_bytes - data;
image.orientation = kOrientationMapping[angle][mirror];
image.has_alpha = false; // TODO(yguyon): Handle alpha
return true;
}
//------------------------------------------------------------------------------
namespace {
// Parses the 'data' and returns a string containing all ISOBMFF box.
std::string BmffBoxesToStr(const uint8_t data[], size_t data_size,
uint32_t indent) {
std::stringstream ss;
for (size_t position = 0; position + 4 <= data_size;) {
for (uint32_t i = 0; i < indent; ++i) ss << " ";
const size_t box_size = ReadBigEndian(data + position, sizeof(uint32_t));
if (box_size < 8 || position + box_size > data_size) {
ss << "Bad box size (" << box_size << " bytes)" << std::endl;
break;
}
const uint8_t* const fourcc = data + position + 4;
const char* const box = reinterpret_cast<const char*>(fourcc);
ss << "Box " << box[0] << box[1] << box[2] << box[3] << " (" << box_size
<< " bytes at offset " << position << ")" << std::endl;
if (box_size > 8) {
if (EqualFourCC(fourcc, "meta") && box_size > 12) {
for (uint32_t i = 0; i < indent + 1; ++i) ss << " ";
ss << "Version " << static_cast<uint32_t>(data[position + 8])
<< ", flags " << static_cast<uint32_t>(data[position + 9])
<< static_cast<uint32_t>(data[position + 10])
<< static_cast<uint32_t>(data[position + 11]) << std::endl;
ss << BmffBoxesToStr(data + position + 12, box_size - 12, indent + 1);
} else if (EqualFourCC(fourcc, "iprp") || EqualFourCC(fourcc, "ipco")) {
ss << BmffBoxesToStr(data + position + 8, box_size - 8, indent + 1);
} else {
for (uint32_t i = 0; i <= indent; ++i) ss << " ";
// Display the first few dozens of characters.
for (uint32_t i = 0; i < std::min(box_size - 8, (size_t)72); ++i) {
const char c = reinterpret_cast<const char*>(data + position + 8)[i];
// Display a printable character or an interrogation mark.
ss << ((c >= ' ' && c <= '~') ? c : '?');
}
if (box_size - 8 > 72) ss << "..."; // Indicate missing characters.
ss << std::endl;
}
}
position += box_size;
}
return ss.str();
}
} // namespace
std::string BmffBoxesToStr(const uint8_t data[], size_t data_size) {
return BmffBoxesToStr(data, data_size, /*indent=*/1);
}
//------------------------------------------------------------------------------
} // namespace container