stream-deploy/ZLM/3rdpart/media-server/librtp/payload/rtp-av1-pack.c

340 lines
9.5 KiB
C++

// https://aomediacodec.github.io/av1-rtp-spec/
// 7.1. Media Type Definition: video/av1
#include "rtp-packet.h"
#include "rtp-profile.h"
#include "rtp-payload-internal.h"
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <errno.h>
// Timestamp: The RTP timestamp indicates the time when the input frame was sampled, at a clock rate of 90 kHz
#define KHz 90 // 90000Hz
#define N_AV1_HEADER 1
#define OBU_SEQUENCE_HEADER 1
#define OBU_TEMPORAL_DELIMITER 2
#define OBU_FRAME_HEADER 3
#define OBU_TILE_GROUP 4
#define OBU_METADATA 5
#define OBU_FRAME 6
#define OBU_REDUNDANT_FRAME_HEADER 7
#define OBU_TILE_LIST 8
#define AV1_AGGREGATION_HEADER_Z 0x80 // set to 1 if the first OBU element is an OBU fragment that is a continuation of an OBU fragment from the previous packet, 0 otherwise.
#define AV1_AGGREGATION_HEADER_Y 0x40 // set to 1 if the last OBU element is an OBU fragment that will continue in the next packet, 0 otherwise.
#define AV1_AGGREGATION_HEADER_N 0x08 // set to 1 if the packet is the first packet of a coded video sequence, 0 otherwise. Note: if N equals 1 then Z must equal 0.
struct rtp_encode_av1_t
{
struct rtp_packet_t pkt;
struct rtp_payload_t handler;
void* cbparam;
int size;
uint8_t* ptr;
int offset;
uint8_t aggregation;
};
static inline const uint8_t* leb128(const uint8_t* data, size_t bytes, uint64_t* size)
{
size_t i;
for (*size = i = 0; i * 7 < 64 && i < bytes;)
{
*size |= ((uint64_t)(data[i] & 0x7F)) << (i * 7);
if (0 == (data[i++] & 0x80))
break;
}
return data + i;
}
static inline uint8_t* leb128_write(int64_t size, uint8_t* data, size_t bytes)
{
size_t i;
for (i = 0; i * 7 < 64 && i < bytes;)
{
data[i] = (uint8_t)(size & 0x7F);
size >>= 7;
data[i++] |= size > 0 ? 0x80 : 0;
if (0 == size)
break;
}
return data + i;
}
static void* rtp_av1_pack_create(int size, uint8_t pt, uint16_t seq, uint32_t ssrc, struct rtp_payload_t *handler, void* cbparam)
{
struct rtp_encode_av1_t *packer;
packer = (struct rtp_encode_av1_t *)calloc(1, sizeof(*packer));
if (!packer) return NULL;
memcpy(&packer->handler, handler, sizeof(packer->handler));
packer->cbparam = cbparam;
packer->size = size;
packer->pkt.rtp.v = RTP_VERSION;
packer->pkt.rtp.pt = pt;
packer->pkt.rtp.seq = seq;
packer->pkt.rtp.ssrc = ssrc;
return packer;
}
static void rtp_av1_pack_destroy(void* pack)
{
struct rtp_encode_av1_t *packer;
packer = (struct rtp_encode_av1_t *)pack;
#if defined(_DEBUG) || defined(DEBUG)
memset(packer, 0xCC, sizeof(*packer));
#endif
free(packer);
}
static void rtp_av1_pack_get_info(void* pack, uint16_t* seq, uint32_t* timestamp)
{
struct rtp_encode_av1_t *packer;
packer = (struct rtp_encode_av1_t *)pack;
*seq = (uint16_t)packer->pkt.rtp.seq;
*timestamp = packer->pkt.rtp.timestamp;
}
static int rtp_av1_pack_flush(struct rtp_encode_av1_t *packer, uint8_t aggregation)
{
int r, n;
if (!packer->ptr || packer->offset <= RTP_FIXED_HEADER)
return 0; // nothing to send
packer->ptr[RTP_FIXED_HEADER] = aggregation;
packer->pkt.payloadlen = packer->offset - RTP_FIXED_HEADER;
n = rtp_packet_serialize_header(&packer->pkt, packer->ptr, packer->size);
if (n != RTP_FIXED_HEADER)
{
assert(0);
return -1;
}
++packer->pkt.rtp.seq;
packer->pkt.rtp.m = 0; // clear marker bit
packer->aggregation &= ~(AV1_AGGREGATION_HEADER_N | AV1_AGGREGATION_HEADER_Z);
r = packer->handler.packet(packer->cbparam, packer->ptr, n + packer->pkt.payloadlen, packer->pkt.rtp.timestamp, 0);
packer->handler.free(packer->cbparam, packer->ptr);
packer->offset = 0;
packer->ptr = NULL;
return r;
}
static int rtp_av1_pack_obu(struct rtp_encode_av1_t *packer, const uint8_t* obu, int64_t bytes)
{
int r;
int64_t n;
uint8_t* ptr, *end;
while (bytes > 0)
{
if (NULL == packer->ptr)
{
packer->ptr = (uint8_t*)packer->handler.alloc(packer->cbparam, packer->size);
if (!packer->ptr)
return -ENOMEM;
packer->offset = RTP_FIXED_HEADER + 1; // RTP Header + AV1 aggregation header
}
ptr = packer->ptr + packer->offset;
end = packer->ptr + packer->size;
// OBU element size
assert(packer->size < 0x3FFF); // 14bits
if (ptr + bytes + ((bytes > 0x7F) ? 2 : 1) > end)
n = end - ptr - 2;
else
n = bytes;
ptr = leb128_write(n, ptr, end - ptr);
memcpy(ptr, obu, (size_t)n);
ptr += n;
obu += n;
bytes -= n;
packer->offset = (int)(ptr - packer->ptr);
if (packer->size - packer->offset < 8)
{
r = rtp_av1_pack_flush(packer, packer->aggregation | (bytes > 0 ? AV1_AGGREGATION_HEADER_Y : 0));
if (0 != r) return r;
}
if (bytes > 0)
packer->aggregation |= AV1_AGGREGATION_HEADER_Z;
}
return 0;
}
/// https://aomediacodec.github.io/av1-spec/av1-spec.pd
/// Annex B: Length delimited bitstream format
/// @param[in] data temporal_unit
/// @param[in] bytes temporal_unit_sizetemporal_unit_size
static int rtp_av1_pack_input_annexb(void* pack, const void* data, int bytes, uint32_t timestamp)
{
int r;
// uint8_t obu_has_size_field;
uint8_t obu_extension_flag;
uint8_t temporal_id, temporal_id0;
uint8_t spatial_id, spatial_id0;
uint8_t obu_type;
uint64_t obu_size, frame_size;
const uint8_t *ptr, *end, *frame_end, *obu_end;
struct rtp_encode_av1_t *packer;
packer = (struct rtp_encode_av1_t *)pack;
packer->pkt.rtp.timestamp = timestamp;
packer->pkt.rtp.m = 0;
packer->ptr = NULL; // TODO: ptr memory leak
temporal_id0 = spatial_id0 = 0;
ptr = (const uint8_t *)data;
end = ptr + bytes;
for (packer->aggregation = AV1_AGGREGATION_HEADER_N; ptr < end; ptr = frame_end)
{
ptr = leb128(ptr, end - ptr, &frame_size);
frame_end = ptr + frame_size;
if (frame_end > end)
{
assert(0);
return -1;
}
for (; ptr < frame_end; ptr = obu_end)
{
ptr = leb128(ptr, bytes, &obu_size);
obu_end = ptr + obu_size;
if (obu_end > frame_end)
{
assert(0);
return -1;
}
obu_type = (*ptr >> 3) & 0x0F;
obu_extension_flag = *ptr & 0x04;
//obu_has_size_field = *ptr & 0x02;
if (obu_extension_flag)
{
temporal_id = (ptr[1] >> 5) & 0x07;
spatial_id = (ptr[1] >> 3) & 0x03;
// If more than one OBU contained in an RTP packet has an OBU extension header
// then the values of the temporal_id and spatial_id must be the same in all such
// OBUs in the RTP packet.
if (temporal_id != temporal_id0 || spatial_id != spatial_id0)
{
r = rtp_av1_pack_flush(packer, packer->aggregation);
if (0 != r) return r;
temporal_id0 = temporal_id;
spatial_id0 = spatial_id;
}
}
// 5. Packetization rules
// The temporal delimiter OBU, if present, SHOULD be removed
// when transmitting, and MUST be ignored by receivers.
if (OBU_TEMPORAL_DELIMITER == obu_type)
continue;
if (0 != rtp_av1_pack_obu(packer, ptr, obu_size))
return -ENOMEM;
}
}
// The RTP header Marker bit MUST be set equal to 0 if the packet is not the last
// packet of the temporal unit, it SHOULD be set equal to 1 otherwise.
// Note: It is possible for a receiver to receive the last packet of a temporal unit
// without the marker bit being set equal to 1, and a receiver should be able to handle
// this case. The last packet of a temporal unit is also indicated by the next packet,
// in RTP sequence number order, having an incremented timestamp.
packer->pkt.rtp.m = 1;
return rtp_av1_pack_flush(packer, packer->aggregation);
}
/// http://aomedia.org/av1/specification/syntax/#general-obu-syntax
/// Low overhead bitstream format
static int rtp_av1_pack_input_obu(void* pack, const void* data, int bytes, uint32_t timestamp)
{
int r;
size_t i;
size_t offset;
uint64_t len;
uint8_t obu_type;
const uint8_t* ptr, *raw;
struct rtp_encode_av1_t* packer;
packer = (struct rtp_encode_av1_t*)pack;
packer->pkt.rtp.timestamp = timestamp;
packer->pkt.rtp.m = 0;
packer->ptr = NULL; // TODO: ptr memory leak
packer->aggregation = 0;
raw = (const uint8_t*)data;
for (i = r = 0; i < bytes && 0 == r; i += (size_t)len)
{
// http://aomedia.org/av1/specification/syntax/#obu-header-syntax
obu_type = (raw[i] >> 3) & 0x0F;
if (raw[i] & 0x04) // obu_extension_flag
{
// http://aomedia.org/av1/specification/syntax/#obu-extension-header-syntax
// temporal_id = (obu[1] >> 5) & 0x07;
// spatial_id = (obu[1] >> 3) & 0x03;
offset = 2;
}
else
{
offset = 1;
}
if (raw[i] & 0x02) // obu_has_size_field
{
ptr = leb128(raw + i + offset, (int)(bytes - i - offset), &len);
if (ptr + len > raw + bytes)
return -1;
len += ptr - raw - i;
}
else
{
len = bytes - i;
}
// 5. Packetization rules
// The temporal delimiter OBU, if present, SHOULD be removed
// when transmitting, and MUST be ignored by receivers.
if (OBU_TEMPORAL_DELIMITER == obu_type)
continue;
packer->aggregation |= OBU_SEQUENCE_HEADER == obu_type ? AV1_AGGREGATION_HEADER_N : 0;
r = rtp_av1_pack_obu(packer, raw + i, (size_t)len);
}
// The RTP header Marker bit MUST be set equal to 0 if the packet is not the last
// packet of the temporal unit, it SHOULD be set equal to 1 otherwise.
// Note: It is possible for a receiver to receive the last packet of a temporal unit
// without the marker bit being set equal to 1, and a receiver should be able to handle
// this case. The last packet of a temporal unit is also indicated by the next packet,
// in RTP sequence number order, having an incremented timestamp.
packer->pkt.rtp.m = 1;
return rtp_av1_pack_flush(packer, packer->aggregation);
}
struct rtp_payload_encode_t *rtp_av1_encode()
{
static struct rtp_payload_encode_t encode = {
rtp_av1_pack_create,
rtp_av1_pack_destroy,
rtp_av1_pack_get_info,
rtp_av1_pack_input_obu,
};
return &encode;
}