| // Copyright 2014 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "media/formats/mp2t/es_parser_h264.h" |
| |
| #include <limits> |
| |
| #include "base/containers/adapters.h" |
| #include "base/logging.h" |
| #include "base/numerics/safe_conversions.h" |
| #include "media/base/decrypt_config.h" |
| #include "media/base/encryption_pattern.h" |
| #include "media/base/media_util.h" |
| #include "media/base/stream_parser_buffer.h" |
| #include "media/base/timestamp_constants.h" |
| #include "media/base/video_frame.h" |
| #include "media/formats/common/offset_byte_queue.h" |
| #include "media/formats/mp2t/mp2t_common.h" |
| #include "media/video/h264_parser.h" |
| #include "third_party/abseil-cpp/absl/types/optional.h" |
| #include "ui/gfx/geometry/rect.h" |
| #include "ui/gfx/geometry/size.h" |
| |
| namespace media { |
| namespace mp2t { |
| |
| namespace { |
| |
| const int kSampleAESMaxUnprotectedNALULength = 48; |
| const int kSampleAESClearLeaderSize = 32; |
| const int kSampleAESEncryptBlocks = 1; |
| const int kSampleAESSkipBlocks = 9; |
| const int kSampleAESPatternUnit = |
| (kSampleAESEncryptBlocks + kSampleAESSkipBlocks) * 16; |
| |
| // Attempts to find the first or only EP3B (emulation prevention 3 byte) in |
| // the part of the |buffer| between |start_pos| and |end_pos|. Returns the |
| // position of the EP3B, or 0 if there are none. |
| // Note: the EP3B always follows two zero bytes, so the value 0 can never be a |
| // valid position. |
| int FindEP3B(const uint8_t* buffer, int start_pos, int end_pos) { |
| const uint8_t* data = buffer + start_pos; |
| int data_size = end_pos - start_pos; |
| DCHECK_GE(data_size, 0); |
| int bytes_left = data_size; |
| |
| while (bytes_left >= 4) { |
| if (data[0] == 0x00 && data[1] == 0x00 && data[2] == 0x03 && |
| data[3] <= 0x03) { |
| return (data - buffer) + 2; |
| } |
| ++data; |
| --bytes_left; |
| } |
| return 0; |
| } |
| |
| // Remove the byte at |pos| in the |buffer| and close up the gap, moving all the |
| // bytes from [pos + 1, end_pos) to [pos, end_pos - 1). |
| void RemoveByte(uint8_t* buffer, int pos, int end_pos) { |
| memmove(&buffer[pos], &buffer[pos + 1], end_pos - pos - 1); |
| } |
| |
| // Given an Access Unit pointed to by |au| of size |au_size|, removes emulation |
| // prevention 3 bytes (EP3B) from within the |protected_blocks|. Also computes |
| // the |subsamples| vector describing the resulting AU. |
| // Returns the allocated buffer holding the adjusted copy, or NULL if no size |
| // adjustment was necessary. |
| std::unique_ptr<uint8_t[]> AdjustAUForSampleAES( |
| const uint8_t* au, |
| int* au_size, |
| const Ranges<int>& protected_blocks, |
| std::vector<SubsampleEntry>* subsamples) { |
| DCHECK(subsamples); |
| DCHECK(au_size); |
| std::unique_ptr<uint8_t[]> result; |
| int& au_end_pos = *au_size; |
| |
| // 1. Considering each protected block in turn, find any emulation prevention |
| // 3 bytes (EP3B) within it, keeping track of their positions. While doing so, |
| // produce a revised Ranges<int> reflecting the new protected block positions |
| // that will apply after we have removed the EP3Bs. |
| Ranges<int> adjusted_protected_blocks; |
| std::vector<int> epbs; |
| int adjustment = 0; |
| for (size_t i = 0; i < protected_blocks.size(); i++) { |
| int start_pos = protected_blocks.start(i); |
| int end_pos = protected_blocks.end(i); |
| int search_pos = start_pos; |
| int epb_pos; |
| int block_adjustment = 0; |
| while ((epb_pos = FindEP3B(au, search_pos, end_pos))) { |
| epbs.push_back(epb_pos); |
| block_adjustment++; |
| search_pos = epb_pos + 2; |
| } |
| // adjust the start_pos and end_pos to accommodate the EPBs that will be |
| // removed. |
| start_pos -= adjustment; |
| adjustment += block_adjustment; |
| end_pos -= adjustment; |
| if (end_pos - start_pos > kSampleAESMaxUnprotectedNALULength) |
| adjusted_protected_blocks.Add(start_pos, end_pos); |
| else |
| VLOG(1) << "Ignoring short protected block of length: " |
| << (end_pos - start_pos); |
| } |
| |
| // 2. If we actually found any EP3Bs, make a copy of the AU and then remove |
| // the EP3Bs in the copy (we can't modify the original). |
| if (adjustment) { |
| result.reset(new uint8_t[au_end_pos]); |
| uint8_t* temp = result.get(); |
| memcpy(temp, au, au_end_pos); |
| for (const auto& epb : base::Reversed(epbs)) { |
| RemoveByte(temp, epb, au_end_pos); |
| au_end_pos--; |
| } |
| au = temp; |
| VLOG(2) << "Copied AU and removed emulation prevention bytes: " |
| << adjustment; |
| } |
| |
| // We now have either the original AU, or a copy with the EP3Bs removed. |
| // We also have an updated Ranges<int> indicating the protected blocks. |
| // Also au_end_pos has been adjusted to indicate the new au_size. |
| |
| // 3. Use a new Ranges<int> to collect all the clear ranges. They will |
| // automatically be coalesced to minimize the number of (disjoint) ranges. |
| Ranges<int> clear_ranges; |
| int previous_pos = 0; |
| for (size_t i = 0; i < adjusted_protected_blocks.size(); i++) { |
| int start_pos = adjusted_protected_blocks.start(i); |
| int end_pos = adjusted_protected_blocks.end(i); |
| // Add the clear range prior to this protected block. |
| clear_ranges.Add(previous_pos, start_pos); |
| int block_size = end_pos - start_pos; |
| DCHECK_GT(block_size, kSampleAESMaxUnprotectedNALULength); |
| // Add the clear leader. |
| clear_ranges.Add(start_pos, start_pos + kSampleAESClearLeaderSize); |
| block_size -= kSampleAESClearLeaderSize; |
| // The bytes beyond an integral multiple of AES blocks (16 bytes) are to be |
| // left clear. Also, if the last 16 bytes would be the only block in a |
| // pattern unit (160 bytes), they are also left clear. |
| int residual_bytes = block_size % kSampleAESPatternUnit; |
| if (residual_bytes > 16) |
| residual_bytes = residual_bytes % 16; |
| clear_ranges.Add(end_pos - residual_bytes, end_pos); |
| previous_pos = end_pos; |
| } |
| // Add the trailing bytes, if any, beyond the last protected block. |
| clear_ranges.Add(previous_pos, au_end_pos); |
| |
| // 4. Convert the disjoint set of clear ranges into subsample entries. Each |
| // subsample entry is a count of clear bytes followed by a count of protected |
| // bytes. |
| subsamples->clear(); |
| for (size_t i = 0; i < clear_ranges.size(); i++) { |
| int start_pos = clear_ranges.start(i); |
| int end_pos = clear_ranges.end(i); |
| int clear_size = end_pos - start_pos; |
| int encrypt_end_pos = au_end_pos; |
| |
| if (i + 1 < clear_ranges.size()) |
| encrypt_end_pos = clear_ranges.start(i + 1); |
| SubsampleEntry subsample(clear_size, encrypt_end_pos - end_pos); |
| subsamples->push_back(subsample); |
| } |
| return result; |
| } |
| |
| } // namespace |
| |
| // An AUD NALU is at least 4 bytes: |
| // 3 bytes for the start code + 1 byte for the NALU type. |
| constexpr int kMinAUDSize = 4; |
| |
| EsParserH264::EsParserH264(NewVideoConfigCB new_video_config_cb, |
| EmitBufferCB emit_buffer_cb) |
| : es_adapter_(std::move(new_video_config_cb), std::move(emit_buffer_cb)), |
| h264_parser_(new H264Parser()), |
| current_access_unit_pos_(0), |
| next_access_unit_pos_(0), |
| init_encryption_scheme_(EncryptionScheme::kUnencrypted), |
| get_decrypt_config_cb_() {} |
| |
| EsParserH264::EsParserH264(NewVideoConfigCB new_video_config_cb, |
| EmitBufferCB emit_buffer_cb, |
| EncryptionScheme init_encryption_scheme, |
| const GetDecryptConfigCB& get_decrypt_config_cb) |
| : es_adapter_(std::move(new_video_config_cb), std::move(emit_buffer_cb)), |
| h264_parser_(new H264Parser()), |
| current_access_unit_pos_(0), |
| next_access_unit_pos_(0), |
| init_encryption_scheme_(init_encryption_scheme), |
| get_decrypt_config_cb_(get_decrypt_config_cb) {} |
| |
| EsParserH264::~EsParserH264() = default; |
| |
| void EsParserH264::Flush() { |
| DVLOG(1) << __func__; |
| if (!FindAUD(¤t_access_unit_pos_)) |
| return; |
| |
| // Simulate an additional AUD to force emitting the last access unit |
| // which is assumed to be complete at this point. |
| uint8_t aud[] = {0x00, 0x00, 0x01, 0x09}; |
| |
| // Fail if this AUD's push fails allocation, since otherwise the behavior of |
| // the subsequent parse would vary based on whether or not the system is |
| // near-OOM. |
| // TODO(crbug.com/1266639): Consider plumbing parse failure for this push |
| // failure case, instead of what used to OOM but now instead would fail this |
| // CHECK. |
| CHECK(es_queue_->Push(aud, sizeof(aud))); |
| |
| ParseFromEsQueue(); |
| es_adapter_.Flush(); |
| } |
| |
| void EsParserH264::ResetInternal() { |
| DVLOG(1) << __func__; |
| h264_parser_.reset(new H264Parser()); |
| current_access_unit_pos_ = 0; |
| next_access_unit_pos_ = 0; |
| last_video_decoder_config_ = VideoDecoderConfig(); |
| es_adapter_.Reset(); |
| } |
| |
| bool EsParserH264::FindAUD(int64_t* stream_pos) { |
| while (true) { |
| const uint8_t* es; |
| int size; |
| es_queue_->PeekAt(*stream_pos, &es, &size); |
| |
| // Find a start code and move the stream to the start code parser position. |
| off_t start_code_offset; |
| off_t start_code_size; |
| bool start_code_found = H264Parser::FindStartCode( |
| es, size, &start_code_offset, &start_code_size); |
| *stream_pos += start_code_offset; |
| |
| // No H264 start code found or NALU type not available yet. |
| if (!start_code_found || start_code_offset + start_code_size >= size) |
| return false; |
| |
| // Exit the parser loop when an AUD is found. |
| // Note: NALU header for an AUD: |
| // - nal_ref_idc must be 0 |
| // - nal_unit_type must be H264NALU::kAUD |
| if (es[start_code_offset + start_code_size] == H264NALU::kAUD) |
| break; |
| |
| // The current NALU is not an AUD, skip the start code |
| // and continue parsing the stream. |
| *stream_pos += start_code_size; |
| } |
| |
| return true; |
| } |
| |
| bool EsParserH264::ParseFromEsQueue() { |
| DCHECK_LE(es_queue_->head(), current_access_unit_pos_); |
| DCHECK_LE(current_access_unit_pos_, next_access_unit_pos_); |
| DCHECK_LE(next_access_unit_pos_, es_queue_->tail()); |
| |
| // Find the next AUD located at or after |current_access_unit_pos_|. This is |
| // needed since initially |current_access_unit_pos_| might not point to |
| // an AUD. |
| // Discard all the data before the updated |current_access_unit_pos_| |
| // since it won't be used again. |
| bool aud_found = FindAUD(¤t_access_unit_pos_); |
| es_queue_->Trim(current_access_unit_pos_); |
| if (next_access_unit_pos_ < current_access_unit_pos_) |
| next_access_unit_pos_ = current_access_unit_pos_; |
| |
| // Resume parsing later if no AUD was found. |
| if (!aud_found) |
| return true; |
| |
| // Find the next AUD to make sure we have a complete access unit. |
| if (next_access_unit_pos_ < current_access_unit_pos_ + kMinAUDSize) { |
| next_access_unit_pos_ = current_access_unit_pos_ + kMinAUDSize; |
| DCHECK_LE(next_access_unit_pos_, es_queue_->tail()); |
| } |
| if (!FindAUD(&next_access_unit_pos_)) |
| return true; |
| |
| // At this point, we know we have a full access unit. |
| bool is_key_frame = false; |
| int pps_id_for_access_unit = -1; |
| |
| const uint8_t* es; |
| int size; |
| es_queue_->PeekAt(current_access_unit_pos_, &es, &size); |
| int access_unit_size = base::checked_cast<int>( |
| next_access_unit_pos_ - current_access_unit_pos_); |
| DCHECK_LE(access_unit_size, size); |
| h264_parser_->SetStream(es, access_unit_size); |
| |
| while (true) { |
| bool is_eos = false; |
| H264NALU nalu; |
| switch (h264_parser_->AdvanceToNextNALU(&nalu)) { |
| case H264Parser::kOk: |
| break; |
| case H264Parser::kInvalidStream: |
| case H264Parser::kUnsupportedStream: |
| return false; |
| case H264Parser::kEOStream: |
| is_eos = true; |
| break; |
| } |
| if (is_eos) |
| break; |
| |
| switch (nalu.nal_unit_type) { |
| case H264NALU::kAUD: { |
| DVLOG(LOG_LEVEL_ES) << "NALU: AUD"; |
| break; |
| } |
| case H264NALU::kSPS: { |
| DVLOG(LOG_LEVEL_ES) << "NALU: SPS"; |
| int sps_id; |
| if (h264_parser_->ParseSPS(&sps_id) != H264Parser::kOk) |
| return false; |
| break; |
| } |
| case H264NALU::kPPS: { |
| DVLOG(LOG_LEVEL_ES) << "NALU: PPS"; |
| int pps_id; |
| if (h264_parser_->ParsePPS(&pps_id) != H264Parser::kOk) { |
| // Allow PPS parsing to fail if SPS have not been parsed yet, |
| // since it is possible to have a PPS before SPS in the stream. |
| if (last_video_decoder_config_.IsValidConfig()) |
| return false; |
| } |
| break; |
| } |
| case H264NALU::kIDRSlice: |
| case H264NALU::kNonIDRSlice: { |
| is_key_frame = (nalu.nal_unit_type == H264NALU::kIDRSlice); |
| DVLOG(LOG_LEVEL_ES) << "NALU: slice IDR=" << is_key_frame; |
| H264SliceHeader shdr; |
| if (h264_parser_->ParseSliceHeader(nalu, &shdr) != H264Parser::kOk) { |
| // Only accept an invalid SPS/PPS at the beginning when the stream |
| // does not necessarily start with an SPS/PPS/IDR. |
| // TODO(damienv): Should be able to differentiate a missing SPS/PPS |
| // from a slice header parsing error. |
| if (last_video_decoder_config_.IsValidConfig()) |
| return false; |
| } else { |
| pps_id_for_access_unit = shdr.pic_parameter_set_id; |
| } |
| // With HLS SampleAES, protected blocks in H.264 consist of IDR and non- |
| // IDR slices that are more than 48 bytes in length. |
| if (get_decrypt_config_cb_ && get_decrypt_config_cb_.Run() && |
| nalu.size > kSampleAESMaxUnprotectedNALULength) { |
| int64_t nal_begin = nalu.data - es; |
| protected_blocks_.Add(nal_begin, nal_begin + nalu.size); |
| } |
| break; |
| } |
| default: { |
| DVLOG(LOG_LEVEL_ES) << "NALU: " << nalu.nal_unit_type; |
| } |
| } |
| } |
| |
| // Emit a frame and move the stream to the next AUD position. |
| RCHECK(EmitFrame(current_access_unit_pos_, access_unit_size, |
| is_key_frame, pps_id_for_access_unit)); |
| current_access_unit_pos_ = next_access_unit_pos_; |
| es_queue_->Trim(current_access_unit_pos_); |
| |
| return true; |
| } |
| |
| bool EsParserH264::EmitFrame(int64_t access_unit_pos, |
| int access_unit_size, |
| bool is_key_frame, |
| int pps_id) { |
| // Get the access unit timing info. |
| // Note: |current_timing_desc.pts| might be |kNoTimestamp| at this point |
| // if: |
| // - the stream is not fully MPEG-2 compliant. |
| // - or if the stream relies on H264 VUI parameters to compute the timestamps. |
| // See H.222 spec: section 2.7.5 "Conditional coding of timestamps". |
| // This part is not yet implemented in EsParserH264. |
| // |es_adapter_| will take care of the missing timestamps. |
| TimingDesc current_timing_desc = GetTimingDescriptor(access_unit_pos); |
| DVLOG_IF(1, current_timing_desc.pts == kNoTimestamp) << "Missing timestamp"; |
| |
| // If only the PTS is provided, copy the PTS into the DTS. |
| if (current_timing_desc.dts == kNoDecodeTimestamp) { |
| current_timing_desc.dts = |
| DecodeTimestamp::FromPresentationTime(current_timing_desc.pts); |
| } |
| |
| // Update the video decoder configuration if needed. |
| const H264PPS* pps = h264_parser_->GetPPS(pps_id); |
| if (!pps) { |
| // Only accept an invalid PPS at the beginning when the stream |
| // does not necessarily start with an SPS/PPS/IDR. |
| // In this case, the initial frames are conveyed to the upper layer with |
| // an invalid VideoDecoderConfig and it's up to the upper layer |
| // to process this kind of frame accordingly. |
| if (last_video_decoder_config_.IsValidConfig()) |
| return false; |
| } else { |
| const H264SPS* sps = h264_parser_->GetSPS(pps->seq_parameter_set_id); |
| if (!sps) |
| return false; |
| RCHECK(UpdateVideoDecoderConfig(sps, init_encryption_scheme_)); |
| } |
| |
| // Emit a frame. |
| DVLOG(LOG_LEVEL_ES) << "Emit frame: stream_pos=" << current_access_unit_pos_ |
| << " size=" << access_unit_size; |
| int es_size; |
| const uint8_t* es; |
| es_queue_->PeekAt(current_access_unit_pos_, &es, &es_size); |
| CHECK_GE(es_size, access_unit_size); |
| |
| const DecryptConfig* base_decrypt_config = nullptr; |
| if (get_decrypt_config_cb_) |
| base_decrypt_config = get_decrypt_config_cb_.Run(); |
| |
| std::unique_ptr<uint8_t[]> adjusted_au; |
| std::vector<SubsampleEntry> subsamples; |
| if (base_decrypt_config) { |
| adjusted_au = AdjustAUForSampleAES(es, &access_unit_size, protected_blocks_, |
| &subsamples); |
| protected_blocks_.clear(); |
| if (adjusted_au) |
| es = adjusted_au.get(); |
| } |
| |
| // TODO(wolenetz/acolwell): Validate and use a common cross-parser TrackId |
| // type and allow multiple video tracks. See https://crbug.com/341581. |
| scoped_refptr<StreamParserBuffer> stream_parser_buffer = |
| StreamParserBuffer::CopyFrom(es, access_unit_size, is_key_frame, |
| DemuxerStream::VIDEO, kMp2tVideoTrackId); |
| stream_parser_buffer->SetDecodeTimestamp(current_timing_desc.dts); |
| stream_parser_buffer->set_timestamp(current_timing_desc.pts); |
| if (base_decrypt_config) { |
| switch (base_decrypt_config->encryption_scheme()) { |
| case EncryptionScheme::kUnencrypted: |
| // As |base_decrypt_config| is specified, the stream is encrypted, |
| // so this shouldn't happen. |
| NOTREACHED(); |
| break; |
| case EncryptionScheme::kCenc: |
| stream_parser_buffer->set_decrypt_config( |
| DecryptConfig::CreateCencConfig(base_decrypt_config->key_id(), |
| base_decrypt_config->iv(), |
| subsamples)); |
| break; |
| case EncryptionScheme::kCbcs: |
| // Note that for SampleAES the (encrypt,skip) pattern is constant. |
| // If not specified in |base_decrypt_config|, use default values. |
| stream_parser_buffer->set_decrypt_config( |
| DecryptConfig::CreateCbcsConfig( |
| base_decrypt_config->key_id(), base_decrypt_config->iv(), |
| subsamples, |
| EncryptionPattern(kSampleAESEncryptBlocks, |
| kSampleAESSkipBlocks))); |
| break; |
| } |
| } |
| return es_adapter_.OnNewBuffer(stream_parser_buffer); |
| } |
| |
| bool EsParserH264::UpdateVideoDecoderConfig(const H264SPS* sps, |
| EncryptionScheme scheme) { |
| // Set the SAR to 1 when not specified in the H264 stream. |
| int sar_width = (sps->sar_width == 0) ? 1 : sps->sar_width; |
| int sar_height = (sps->sar_height == 0) ? 1 : sps->sar_height; |
| |
| absl::optional<gfx::Size> coded_size = sps->GetCodedSize(); |
| if (!coded_size) |
| return false; |
| |
| absl::optional<gfx::Rect> visible_rect = sps->GetVisibleRect(); |
| if (!visible_rect) |
| return false; |
| |
| if (visible_rect->width() > std::numeric_limits<int>::max() / sar_width) { |
| DVLOG(1) << "Integer overflow detected: visible_rect.width()=" |
| << visible_rect->width() << " sar_width=" << sar_width; |
| return false; |
| } |
| gfx::Size natural_size((visible_rect->width() * sar_width) / sar_height, |
| visible_rect->height()); |
| if (natural_size.width() == 0) |
| return false; |
| |
| VideoCodecProfile profile = |
| H264Parser::ProfileIDCToVideoCodecProfile(sps->profile_idc); |
| if (profile == VIDEO_CODEC_PROFILE_UNKNOWN) { |
| DVLOG(1) << "Unrecognized SPS profile_idc 0x" << std::hex |
| << sps->profile_idc; |
| return false; |
| } |
| |
| VideoDecoderConfig video_decoder_config( |
| VideoCodec::kH264, profile, VideoDecoderConfig::AlphaMode::kIsOpaque, |
| VideoColorSpace::REC709(), kNoTransformation, coded_size.value(), |
| visible_rect.value(), natural_size, EmptyExtraData(), scheme); |
| |
| if (!video_decoder_config.IsValidConfig()) { |
| DVLOG(1) << "Invalid video config: " |
| << video_decoder_config.AsHumanReadableString(); |
| return false; |
| } |
| |
| if (!video_decoder_config.Matches(last_video_decoder_config_)) { |
| DVLOG(1) << "Profile IDC: " << sps->profile_idc; |
| DVLOG(1) << "Level IDC: " << sps->level_idc; |
| DVLOG(1) << "Pic width: " << coded_size->width(); |
| DVLOG(1) << "Pic height: " << coded_size->height(); |
| DVLOG(1) << "log2_max_frame_num_minus4: " |
| << sps->log2_max_frame_num_minus4; |
| DVLOG(1) << "SAR: width=" << sps->sar_width |
| << " height=" << sps->sar_height; |
| last_video_decoder_config_ = video_decoder_config; |
| es_adapter_.OnConfigChanged(video_decoder_config); |
| } |
| |
| return true; |
| } |
| |
| } // namespace mp2t |
| } // namespace media |