321 lines
10 KiB
C++
321 lines
10 KiB
C++
/*
|
||
* Copyright (c) 2016-present The ZLMediaKit project authors. All Rights Reserved.
|
||
*
|
||
* This file is part of ZLMediaKit(https://github.com/ZLMediaKit/ZLMediaKit).
|
||
*
|
||
* Use of this source code is governed by MIT-like license that can be found in the
|
||
* LICENSE file in the root of the source tree. All contributing project authors
|
||
* may be found in the AUTHORS file in the root of the source tree.
|
||
*/
|
||
|
||
#include "VP9Rtp.h"
|
||
#include "Extension/Frame.h"
|
||
#include "Common/config.h"
|
||
|
||
namespace mediakit{
|
||
|
||
const int16_t kNoPictureId = -1;
|
||
const int8_t kNoTl0PicIdx = -1;
|
||
const uint8_t kNoTemporalIdx = 0xFF;
|
||
const int kNoKeyIdx = -1;
|
||
|
||
struct VP9ResolutionLayer {
|
||
int width;
|
||
int height;
|
||
};
|
||
|
||
struct RTPPayloadVP9 {
|
||
bool hasPictureID = false;
|
||
bool interPicturePrediction = false;
|
||
bool hasLayerIndices = false;
|
||
bool flexibleMode = false;
|
||
bool beginningOfLayerFrame = false;
|
||
bool endingOfLayerFrame = false;
|
||
bool hasScalabilityStructure = false;
|
||
bool largePictureID = false;
|
||
int pictureID = -1;
|
||
int temporalID = -1;
|
||
bool isSwitchingUp = false;
|
||
int spatialID = -1;
|
||
bool isInterLayeredDepUsed = false;
|
||
int tl0PicIdx = -1;
|
||
int referenceIdx = -1;
|
||
bool additionalReferenceIdx = false;
|
||
int spatialLayers = -1;
|
||
bool hasResolution = false;
|
||
bool hasGof = false;
|
||
int numberOfFramesInGof = -1;
|
||
std::vector<VP9ResolutionLayer> resolutions;
|
||
int parse(unsigned char* data, int dataLength);
|
||
bool keyFrame() const { return beginningOfLayerFrame && !interPicturePrediction; }
|
||
std::string dump() const {
|
||
char line[64] = {0};
|
||
snprintf(line, sizeof(line), "%c%c%c%c%c%c%c- %d %d, %d %d",
|
||
hasPictureID ? 'I' : ' ',
|
||
interPicturePrediction ? 'P' : ' ',
|
||
hasLayerIndices ? 'L' : ' ',
|
||
flexibleMode ? 'F' : ' ',
|
||
beginningOfLayerFrame ? 'B' : ' ',
|
||
endingOfLayerFrame ? 'E' : ' ',
|
||
hasScalabilityStructure ? 'V' : ' ',
|
||
pictureID, tl0PicIdx,
|
||
spatialID, temporalID);
|
||
return line;
|
||
}
|
||
};
|
||
//
|
||
// VP9 format:
|
||
//
|
||
// Payload descriptor (Flexible mode F = 1)
|
||
// 0 1 2 3 4 5 6 7
|
||
// +-+-+-+-+-+-+-+-+
|
||
// |I|P|L|F|B|E|V|-| (REQUIRED)
|
||
// +-+-+-+-+-+-+-+-+
|
||
// I: |M| PICTURE ID | (REQUIRED)
|
||
// +-+-+-+-+-+-+-+-+
|
||
// M: | EXTENDED PID | (RECOMMENDED)
|
||
// +-+-+-+-+-+-+-+-+
|
||
// L: | T |U| S |D| (CONDITIONALLY RECOMMENDED)
|
||
// +-+-+-+-+-+-+-+-+ -
|
||
// P,F: | P_DIFF |N| (CONDITIONALLY REQUIRED) - up to 3 times
|
||
// +-+-+-+-+-+-+-+-+ -
|
||
// V: | SS |
|
||
// | .. |
|
||
// +-+-+-+-+-+-+-+-+
|
||
//
|
||
// Payload descriptor (Non flexible mode F = 0)
|
||
//
|
||
// 0 1 2 3 4 5 6 7
|
||
// +-+-+-+-+-+-+-+-+
|
||
// |I|P|L|F|B|E|V|-| (REQUIRED)
|
||
// +-+-+-+-+-+-+-+-+
|
||
// I: |M| PICTURE ID | (RECOMMENDED)
|
||
// +-+-+-+-+-+-+-+-+
|
||
// M: | EXTENDED PID | (RECOMMENDED)
|
||
// +-+-+-+-+-+-+-+-+
|
||
// L: | T |U| S |D| (CONDITIONALLY RECOMMENDED)
|
||
// +-+-+-+-+-+-+-+-+
|
||
// | TL0PICIDX | (CONDITIONALLY REQUIRED)
|
||
// +-+-+-+-+-+-+-+-+
|
||
// V: | SS |
|
||
// | .. |
|
||
// +-+-+-+-+-+-+-+-+
|
||
#define kIBit 0x80
|
||
#define kPBit 0x40
|
||
#define kLBit 0x20
|
||
#define kFBit 0x10
|
||
#define kBBit 0x08
|
||
#define kEBit 0x04
|
||
#define kVBit 0x02
|
||
int RTPPayloadVP9::parse(unsigned char *data, int dataLength) {
|
||
const unsigned char* dataPtr = data;
|
||
// Parse mandatory first byte of payload descriptor
|
||
this->hasPictureID = (*dataPtr & kIBit); // I bit
|
||
this->interPicturePrediction = (*dataPtr & kPBit); // P bit
|
||
this->hasLayerIndices = (*dataPtr & kLBit); // L bit
|
||
this->flexibleMode = (*dataPtr & kFBit); // F bit
|
||
this->beginningOfLayerFrame = (*dataPtr & kBBit); // B bit
|
||
this->endingOfLayerFrame = (*dataPtr & kEBit); // E bit
|
||
this->hasScalabilityStructure = (*dataPtr & kVBit); // V bit
|
||
dataPtr++;
|
||
|
||
if (this->hasPictureID) {
|
||
this->largePictureID = (*dataPtr & 0x80); // M bit
|
||
this->pictureID = (*dataPtr & 0x7F);
|
||
if (this->largePictureID) {
|
||
dataPtr++;
|
||
this->pictureID = ntohs((this->pictureID << 16) + (*dataPtr & 0xFF));
|
||
}
|
||
dataPtr++;
|
||
}
|
||
|
||
if (this->hasLayerIndices) {
|
||
this->temporalID = (*dataPtr & 0xE0) >> 5; // T bits
|
||
this->isSwitchingUp = (*dataPtr & 0x10); // U bit
|
||
this->spatialID = (*dataPtr & 0x0E) >> 1; // S bits
|
||
this->isInterLayeredDepUsed = (*dataPtr & 0x01); // D bit
|
||
if (this->flexibleMode) { // marked in webrtc code
|
||
do {
|
||
dataPtr++;
|
||
this->referenceIdx = (*dataPtr & 0xFE) >> 1;
|
||
this->additionalReferenceIdx = (*dataPtr & 0x01); // D bit
|
||
} while (this->additionalReferenceIdx);
|
||
} else {
|
||
dataPtr++;
|
||
this->tl0PicIdx = (*dataPtr & 0xFF);
|
||
}
|
||
dataPtr++;
|
||
}
|
||
|
||
if (this->flexibleMode && this->interPicturePrediction) {
|
||
/* Skip reference indices */
|
||
uint8_t nbit;
|
||
do {
|
||
uint8_t p_diff = (*dataPtr & 0xFE) >> 1;
|
||
nbit = (*dataPtr & 0x01);
|
||
dataPtr++;
|
||
} while (nbit);
|
||
}
|
||
if (this->hasScalabilityStructure) {
|
||
this->spatialLayers = (*dataPtr & 0xE0) >> 5; // N_S bits
|
||
this->hasResolution = (*dataPtr & 0x10); // Y bit
|
||
this->hasGof = (*dataPtr & 0x08); // G bit
|
||
dataPtr++;
|
||
if (this->hasResolution) {
|
||
for (int i = 0; i <= this->spatialLayers; i++) {
|
||
int width = (dataPtr[0] << 8) + dataPtr[1];
|
||
dataPtr += 2;
|
||
int height = (dataPtr[0] << 8) + dataPtr[1];
|
||
dataPtr += 2;
|
||
// InfoL << "got vp9 " << width << "x" << height;
|
||
this->resolutions.push_back({ width, height });
|
||
}
|
||
}
|
||
if (this->hasGof) {
|
||
this->numberOfFramesInGof = *dataPtr & 0xFF; // N_G bits
|
||
dataPtr++;
|
||
for (int frame_index = 0; frame_index < this->numberOfFramesInGof; frame_index++) {
|
||
// TODO(javierc): Read these values if needed
|
||
int reference_indices = (*dataPtr & 0x0C) >> 2; // R bits
|
||
dataPtr++;
|
||
for (int reference_index = 0; reference_index < reference_indices; reference_index++) {
|
||
dataPtr++;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
return dataPtr - data;
|
||
}
|
||
|
||
|
||
////////////////////////////////////////////////////
|
||
VP9RtpDecoder::VP9RtpDecoder() {
|
||
obtainFrame();
|
||
}
|
||
|
||
void VP9RtpDecoder::obtainFrame() {
|
||
_frame = FrameImp::create<VP9Frame>();
|
||
}
|
||
|
||
bool VP9RtpDecoder::inputRtp(const RtpPacket::Ptr &rtp, bool key_pos) {
|
||
auto seq = rtp->getSeq();
|
||
bool is_gop = decodeRtp(rtp);
|
||
if (!_gop_dropped && seq != (uint16_t)(_last_seq + 1) && _last_seq) {
|
||
_gop_dropped = true;
|
||
WarnL << "start drop VP9 gop, last seq:" << _last_seq << ", rtp:\r\n" << rtp->dumpString();
|
||
}
|
||
_last_seq = seq;
|
||
return is_gop;
|
||
}
|
||
|
||
bool VP9RtpDecoder::decodeRtp(const RtpPacket::Ptr &rtp) {
|
||
auto payload_size = rtp->getPayloadSize();
|
||
if (payload_size < 1) {
|
||
// No actual payload
|
||
return false;
|
||
}
|
||
auto payload = rtp->getPayload();
|
||
auto stamp = rtp->getStampMS();
|
||
auto seq = rtp->getSeq();
|
||
|
||
RTPPayloadVP9 info;
|
||
int offset = info.parse(payload, payload_size);
|
||
// InfoL << rtp->dumpString() << "\n" << info.dump();
|
||
bool start = info.beginningOfLayerFrame;
|
||
if (start) {
|
||
_frame->_pts = stamp;
|
||
_frame->_buffer.clear();
|
||
_frame_drop = false;
|
||
}
|
||
|
||
if (_frame_drop) {
|
||
// This frame is incomplete
|
||
return false;
|
||
}
|
||
|
||
if (!start && seq != (uint16_t)(_last_seq + 1)) {
|
||
// 中间的或末尾的rtp包,其seq必须连续,否则说明rtp丢包,那么该帧不完整,必须得丢弃
|
||
_frame_drop = true;
|
||
_frame->_buffer.clear();
|
||
return false;
|
||
}
|
||
// Append data
|
||
_frame->_buffer.append((char *)payload + offset, payload_size - offset);
|
||
if (info.endingOfLayerFrame) { // rtp->getHeader()->mark
|
||
// 确保下一个包必须是beginningOfLayerFrame
|
||
_frame_drop = true;
|
||
// 该帧最后一个rtp包,输出frame
|
||
outputFrame(rtp);
|
||
}
|
||
return info.keyFrame();
|
||
}
|
||
|
||
void VP9RtpDecoder::outputFrame(const RtpPacket::Ptr &rtp) {
|
||
if (_frame->dropAble()) {
|
||
// 不参与dts生成 [AUTO-TRANSLATED:dff3b747]
|
||
// Not involved in dts generation
|
||
_frame->_dts = _frame->_pts;
|
||
} else {
|
||
// rtsp没有dts,那么根据pts排序算法生成dts [AUTO-TRANSLATED:f37c17f3]
|
||
// Rtsp does not have dts, so dts is generated according to the pts sorting algorithm
|
||
_dts_generator.getDts(_frame->_pts, _frame->_dts);
|
||
}
|
||
|
||
if (_frame->keyFrame() && _gop_dropped) {
|
||
_gop_dropped = false;
|
||
InfoL << "new gop received, rtp:\r\n" << rtp->dumpString();
|
||
}
|
||
if (!_gop_dropped || _frame->configFrame()) {
|
||
// InfoL << _frame->pts() << " size=" << _frame->size();
|
||
RtpCodec::inputFrame(_frame);
|
||
}
|
||
obtainFrame();
|
||
}
|
||
|
||
|
||
////////////////////////////////////////////////////////////////////////
|
||
|
||
bool VP9RtpEncoder::inputFrame(const Frame::Ptr &frame) {
|
||
uint8_t header[20] = { 0 };
|
||
int nheader = 1;
|
||
header[0] = kBBit;
|
||
bool key = frame->keyFrame();
|
||
if (!key)
|
||
header[0] |= kPBit;
|
||
#if 1
|
||
header[0] |= kIBit;
|
||
if (++_pic_id > 0x7FFF) {
|
||
_pic_id = 0;
|
||
}
|
||
header[1] = (0x80 | ((_pic_id >> 8) & 0x7F));
|
||
header[2] = (_pic_id & 0xFF);
|
||
nheader += 2;
|
||
#endif
|
||
const char *ptr = frame->data() + frame->prefixSize();
|
||
int len = frame->size() - frame->prefixSize();
|
||
int pdu_size = getRtpInfo().getMaxSize() - nheader;
|
||
|
||
bool mark = false;
|
||
for (int pos = 0; pos < len; pos += pdu_size) {
|
||
if (len - pos <= pdu_size) {
|
||
pdu_size = len - pos;
|
||
header[0] |= kEBit;
|
||
mark = true;
|
||
}
|
||
|
||
auto rtp = getRtpInfo().makeRtp(TrackVideo, nullptr, pdu_size + nheader, mark, frame->pts());
|
||
if (rtp) {
|
||
uint8_t *payload = rtp->getPayload();
|
||
memcpy(payload, header, nheader);
|
||
memcpy(payload + nheader, ptr + pos, pdu_size);
|
||
RtpCodec::inputRtp(rtp, key);
|
||
}
|
||
key = false;
|
||
header[0] &= (~kBBit); // Clear 'Begin of partition' bit.
|
||
}
|
||
return true;
|
||
}
|
||
|
||
} // namespace mediakit
|