현재 ffmpeg 라이브러리를 사용하여 미디어 파일을 트랜스 코딩하는 소프트웨어를 작성하고 있습니다. 문제는 H264의 경우 QuickTime에서 결과 스트림을 재생할 수없고 검은 색 화면이 나타나는 것입니다. 오디오 스트림이 예상대로 작동합니다. QuickTime은 yuv420p
픽셀 형식 만 처리 할 수 있으며 이는 인코딩 된 비디오에 적용됩니다.QuickTime 플레이어에서 재생할 수없는 변환 된 비디오 스트림
나는 ffmpeg 예제와 ffmpeg 소스 코드를 살펴 보았으므로 문제가있는 곳을 찾을 수 없습니다. 어떤 도움을 주셔서 감사합니다.
내가 퀵타임에서 얻을 수 있었던 유일한 것은 콘솔에 SeqAndPicParamSetFromCFDictionaryRef, bad config record
메시지입니다. 동일한 것은 AVFoundation에서 AVPlayer에 의해 기록됩니다.
다음은 출력 스트림과 인코더의 초기화입니다.
int status;
// avformat_alloc_output_context2()
if ((status = formatContext.open(destFilename)) < 0) {
return status;
}
AVDictionary *fmtOptions = nullptr;
av_dict_set(&fmtOptions, "movflags", "faststart", 0);
av_dict_set(&fmtOptions, "brand", "mp42", 0);
streams.resize(input->getStreamsCount());
for (int i = 0; i < input->getStreamsCount(); ++i) {
AVStream *inputStream = input->getStreamAtIndex(i);
CodecContext &decoderContext = input->getDecoderAtIndex(i);
// retrieve output codec by codec id
auto encoderCodecId = decoderContext.getCodecID();;
if (decoderContext.getCodecType() == AVMEDIA_TYPE_VIDEO || decoderContext.getCodecType() == AVMEDIA_TYPE_AUDIO) {
int codecIdKey = decoderContext.getCodecType() == AVMEDIA_TYPE_AUDIO ? IPROC_KEY_INT(TargetAudioCodecID) : IPROC_KEY_INT(TargetVideoCodecID);
auto codecIdParam = static_cast<AVCodecID>(params[codecIdKey]);
if (codecIdParam != AV_CODEC_ID_NONE) {
encoderCodecId = codecIdParam;
}
}
AVCodec *encoder = nullptr;
if ((encoder = avcodec_find_encoder(encoderCodecId)) == nullptr) {
status = AVERROR_ENCODER_NOT_FOUND;
return status;
}
// create stream with specific codec and format
AVStream *outputStream = nullptr;
// avformat_new_stream()
if ((outputStream = formatContext.newStream(encoder)) == nullptr) {
return AVERROR(ENOMEM);
}
CodecContext encoderContext;
// avcodec_alloc_context3()
if ((status = encoderContext.init(encoder)) < 0) {
return status;
}
outputStream->disposition = inputStream->disposition;
encoderContext.getRawCtx()->chroma_sample_location = decoderContext.getRawCtx()->chroma_sample_location;
if (encoderContext.getCodecType() == AVMEDIA_TYPE_VIDEO) {
auto lang = av_dict_get(input->getStreamAtIndex(i)->metadata, "language", nullptr, 0);
if (lang) {
av_dict_set(&outputStream->metadata, "language", lang->value, 0);
}
// prepare encoder context
int targetWidth = params[IPROC_KEY_INT(TargetVideoWidth)];
int targetHeight = params[IPROC_KEY_INT(TargetVideHeight)];
encoderContext.width() = targetWidth > 0 ? targetWidth : decoderContext.width();
encoderContext.height() = targetHeight > 0 ? targetHeight : decoderContext.height();
encoderContext.pixelFormat() = encoder->pix_fmts ? encoder->pix_fmts[0] : decoderContext.pixelFormat();;
encoderContext.timeBase() = decoderContext.timeBase();
encoderContext.getRawCtx()->level = 31;
encoderContext.getRawCtx()->gop_size = 25;
double far = static_cast<double>(encoderContext.getRawCtx()->width)/encoderContext.getRawCtx()->height;
double dar = static_cast<double>(decoderContext.width())/decoderContext.height();
encoderContext.sampleAspectRatio() = av_d2q(dar/far, 255);
encoderContext.getRawCtx()->bits_per_raw_sample = FFMIN(decoderContext.getRawCtx()->bits_per_raw_sample,
av_pix_fmt_desc_get(encoderContext.pixelFormat())->comp[0].depth);
encoderContext.getRawCtx()->framerate = inputStream->r_frame_rate;
outputStream->avg_frame_rate = encoderContext.getRawCtx()->framerate;
VideoFilterGraphParameters params;
params.height = encoderContext.height();
params.width = encoderContext.width();
params.pixelFormat = encoderContext.pixelFormat();
if ((status = generateGraph(decoderContext, encoderContext, params, streams[i].filterGraph)) < 0) {
return status;
}
} else if (encoderContext.getCodecType() == AVMEDIA_TYPE_AUDIO) {
auto lang = av_dict_get(input->getStreamAtIndex(i)->metadata, "language", nullptr, 0);
if (lang) {
av_dict_set(&outputStream->metadata, "language", lang->value, 0);
}
encoderContext.sampleRate() = params[IPROC_KEY_INT(TargetAudioSampleRate)] ? : decoderContext.sampleRate();
encoderContext.channels() = params[IPROC_KEY_INT(TargetAudioChannels)] ? : decoderContext.channels();
auto paramChannelLayout = params[IPROC_KEY_INT(TargetAudioChannelLayout)];
if (paramChannelLayout) {
encoderContext.channelLayout() = paramChannelLayout;
} else {
encoderContext.channelLayout() = av_get_default_channel_layout(encoderContext.channels());
}
AVSampleFormat sampleFormatParam = static_cast<AVSampleFormat>(params[IPROC_KEY_INT(TargetAudioSampleFormat)]);
if (sampleFormatParam != AV_SAMPLE_FMT_NONE) {
encoderContext.sampleFormat() = sampleFormatParam;
} else if (encoder->sample_fmts) {
encoderContext.sampleFormat() = encoder->sample_fmts[0];
} else {
encoderContext.sampleFormat() = decoderContext.sampleFormat();
}
encoderContext.timeBase().num = 1;
encoderContext.timeBase().den = encoderContext.sampleRate();
AudioFilterGraphParameters params;
params.channelLayout = encoderContext.channelLayout();
params.channels = encoderContext.channels();
params.format = encoderContext.sampleFormat();
params.sampleRate = encoderContext.sampleRate();
if ((status = generateGraph(decoderContext, encoderContext, params, streams[i].filterGraph)) < 0) {
return status;
}
}
// before using encoder, we should open it and update its parameters
printf("Codec bits per sample %d\n", av_get_bits_per_sample(encoderCodecId));
AVDictionary *options = nullptr;
// avcodec_open2()
if ((status = encoderContext.open(encoder, &options)) < 0) {
return status;
}
if (streams[i].filterGraph) {
streams[i].filterGraph.setOutputFrameSize(encoderContext.getFrameSize());
}
// avcodec_parameters_from_context()
if ((status = encoderContext.fillParamters(outputStream->codecpar)) < 0) {
return status;
}
outputStream->codecpar->format = encoderContext.getRawCtx()->pix_fmt;
if (formatContext.getRawCtx()->oformat->flags & AVFMT_GLOBALHEADER) {
encoderContext.getRawCtx()->flags |= CODEC_FLAG_GLOBAL_HEADER;
}
if (encoderContext.getRawCtx()->nb_coded_side_data) {
int i;
for (i = 0; i < encoderContext.getRawCtx()->nb_coded_side_data; i++) {
const AVPacketSideData *sd_src = &encoderContext.getRawCtx()->coded_side_data[i];
uint8_t *dst_data;
dst_data = av_stream_new_side_data(outputStream, sd_src->type, sd_src->size);
if (!dst_data)
return AVERROR(ENOMEM);
memcpy(dst_data, sd_src->data, sd_src->size);
}
}
/*
* Add global input side data. For now this is naive, and copies it
* from the input stream's global side data. All side data should
* really be funneled over AVFrame and libavfilter, then added back to
* packet side data, and then potentially using the first packet for
* global side data.
*/
for (int i = 0; i < inputStream->nb_side_data; i++) {
AVPacketSideData *sd = &inputStream->side_data[i];
uint8_t *dst = av_stream_new_side_data(outputStream, sd->type, sd->size);
if (!dst)
return AVERROR(ENOMEM);
memcpy(dst, sd->data, sd->size);
}
// copy timebase while removing common factors
if (outputStream->time_base.num <= 0 || outputStream->time_base.den <= 0) {
outputStream->time_base = av_add_q(encoderContext.timeBase(), (AVRational){0, 1});
}
// copy estimated duration as a hint to the muxer
if (outputStream->duration <= 0 && inputStream->duration > 0) {
outputStream->duration = av_rescale_q(inputStream->duration, inputStream->time_base, outputStream->time_base);
}
streams[i].codecType = encoderContext.getRawCtx()->codec_type;
streams[i].codec = std::move(encoderContext);
streams[i].streamIndex = i;
}
// avio_open() and avformat_write_header()
if ((status = formatContext.writeHeader(fmtOptions)) < 0) {
return status;
}
formatContext.dumpFormat();
스트림으로부터 판독.
int InputProcessor::performStep() {
int status;
Packet nextPacket;
if ((status = input->getFormatContext().readFrame(nextPacket)) < 0) {
return status;
}
++streams[nextPacket.getStreamIndex()].readPackets;
int streamIndex = nextPacket.getStreamIndex();
CodecContext &decoder = input->getDecoderAtIndex(streamIndex);
AVStream *inputStream = input->getStreamAtIndex(streamIndex);
if (streams[nextPacket.getStreamIndex()].readPackets == 1) {
for (int i = 0; i < inputStream->nb_side_data; ++i) {
AVPacketSideData *src_sd = &inputStream->side_data[i];
uint8_t *dst_data;
if (src_sd->type == AV_PKT_DATA_DISPLAYMATRIX) {
continue;
}
if (av_packet_get_side_data(nextPacket.getRawPtr(), src_sd->type, nullptr)) {
continue;
}
dst_data = av_packet_new_side_data(nextPacket.getRawPtr(), src_sd->type, src_sd->size);
if (!dst_data) {
return AVERROR(ENOMEM);
}
memcpy(dst_data, src_sd->data, src_sd->size);
}
}
nextPacket.rescaleTimestamps(inputStream->time_base, decoder.timeBase());
status = decodePacket(&nextPacket, nextPacket.getStreamIndex());
if (status < 0 && status != AVERROR(EAGAIN)) {
return status;
}
return 0;
}
여기에 디코딩/인코딩 코드가 있습니다.
int InputProcessor::decodePacket(Packet *packet, int streamIndex) {
int status;
int sendStatus;
auto &decoder = input->getDecoderAtIndex(streamIndex);
do {
if (packet == nullptr) {
sendStatus = decoder.flushDecodedFrames();
} else {
sendStatus = decoder.sendPacket(*packet);
}
if (sendStatus < 0 && sendStatus != AVERROR(EAGAIN) && sendStatus != AVERROR_EOF) {
return sendStatus;
}
if (sendStatus == 0 && packet) {
++streams[streamIndex].decodedPackets;
}
Frame decodedFrame;
while (true) {
if ((status = decoder.receiveFrame(decodedFrame)) < 0) {
break;
}
++streams[streamIndex].decodedFrames;
if ((status = filterAndWriteFrame(&decodedFrame, streamIndex)) < 0) {
break;
}
decodedFrame.unref();
}
} while (sendStatus == AVERROR(EAGAIN));
return status;
}
int InputProcessor::encodeAndWriteFrame(Frame *frame, int streamIndex) {
assert(input->isValid());
assert(formatContext);
int status = 0;
int sendStatus;
Packet packet;
CodecContext &encoderContext = streams[streamIndex].codec;
do {
if (frame) {
sendStatus = encoderContext.sendFrame(*frame);
} else {
sendStatus = encoderContext.flushEncodedPackets();
}
if (sendStatus < 0 && sendStatus != AVERROR(EAGAIN) && sendStatus != AVERROR_EOF) {
return status;
}
if (sendStatus == 0 && frame) {
++streams[streamIndex].encodedFrames;
}
while (true) {
if ((status = encoderContext.receivePacket(packet)) < 0) {
break;
}
++streams[streamIndex].encodedPackets;
packet.setStreamIndex(streamIndex);
auto sourceTimebase = encoderContext.timeBase();
auto dstTimebase = formatContext.getStreams()[streamIndex]->time_base;
packet.rescaleTimestamps(sourceTimebase, dstTimebase);
if ((status = formatContext.writeFrameInterleaved(packet)) < 0) {
return status;
}
packet.unref();
}
} while (sendStatus == AVERROR(EAGAIN));
if (status != AVERROR(EAGAIN)) {
return status;
}
return 0;
}
원본 비디오 용 FFprobe 출력.
Input #0, matroska,webm, from 'testvideo':
Metadata:
title : TestVideo
encoder : libebml v1.3.0 + libmatroska v1.4.0
creation_time : 2014-12-23T03:38:05.000000Z
Duration: 00:02:29.25, start: 0.000000, bitrate: 79549 kb/s
Stream #0:0(rus): Video: h264 (High 4:4:4 Predictive), yuv444p10le(pc, bt709, progressive), 2048x858 [SAR 1:1 DAR 1024:429], 24 fps, 24 tbr, 1k tbn, 48 tbc (default)
Stream #0:1(rus): Audio: pcm_s24le, 48000 Hz, 6 channels, s32 (24 bit), 6912 kb/s (default)
트랜스 코딩 :
Input #0, mov,mp4,m4a,3gp,3g2,mj2, from '123.mp4':
Metadata:
major_brand : mp42
minor_version : 512
compatible_brands: isomiso2avc1mp41
encoder : Lavf57.71.100
Duration: 00:02:29.27, start: 0.000000, bitrate: 4282 kb/s
Stream #0:0(rus): Video: h264 (High) (avc1/0x31637661), yuv420p, 1280x720 [SAR 192:143 DAR 1024:429], 3940 kb/s, 24.01 fps, 24 tbr, 12288 tbn, 96 tbc (default)
Metadata:
handler_name : VideoHandler
Stream #0:1(rus): Audio: aac (LC) (mp4a/0x6134706D), 48000 Hz, 5.1, fltp, 336 kb/s (default)
Metadata:
handler_name : SoundHandler