2013-08-23 4 views
2

순수한 440Hz 사인파가 포함 된 wav 파일을 생성하기 위해 JNA를 사용하여 hello world libavcodec을 구현했습니다. 그러나 실제로 프로그램을 실행할 때 the wav file contains annoying clicks and blips (pure sin wav created from the C program과 비교). avcodec_encode_audio2을 어떻게 부르겠습니까?Java JNA를 사용하여 사운드 파일을 인코딩 할 때 왜 화면이 깜박입니까?

여기 내 Java 코드입니다. All the sources 또한 github에 있습니다.

import java.io.IOException; 
import java.nio.ByteBuffer; 
import java.nio.ByteOrder; 
import java.nio.IntBuffer; 
import java.util.Objects; 

import javax.sound.sampled.AudioFormat; 
import javax.sound.sampled.AudioSystem; 
import javax.sound.sampled.DataLine; 
import javax.sound.sampled.LineUnavailableException; 
import javax.sound.sampled.TargetDataLine; 


public class Sin { 
    /** 
    * Abstract class that allows you to put the initialization and cleanup 
    * code at the same place instead of separated by the big try block. 
    */ 
    public static abstract class SharedPtr<T> implements AutoCloseable { 
     public T ptr; 
     public SharedPtr(T ptr) { 
      this.ptr = ptr; 
     } 
     /** 
     * Abstract override forces method to throw no checked exceptions. 
     * Subclasses will call a C function that throws no exceptions. 
     */ 
     @Override public abstract void close(); 
    } 

    /** 
    * @param args 
    * @throws IOException 
    * @throws LineUnavailableException 
    */ 
    public static void main(String[] args) throws IOException, LineUnavailableException { 
     final AvcodecLibrary avcodec = AvcodecLibrary.INSTANCE; 
     final AvformatLibrary avformat = AvformatLibrary.INSTANCE; 
     final AvutilLibrary avutil = AvutilLibrary.INSTANCE; 
     avcodec.avcodec_register_all(); 
     avformat.av_register_all(); 
     AVOutputFormat.ByReference format = null; 
     String format_name = "wav", file_url = "file:sinjava.wav"; 
     for (AVOutputFormat.ByReference formatIter = avformat.av_oformat_next(null); formatIter != null; formatIter = avformat.av_oformat_next(formatIter)) { 
      formatIter.setAutoWrite(false); 
      String iterName = formatIter.name; 
      if (format_name.equals(iterName)) { 
       format = formatIter; 
       break; 
      } 
     } 
     Objects.requireNonNull(format); 
     System.out.format("Found format %s%n", format_name); 
     AVCodec codec = avcodec.avcodec_find_encoder(format.audio_codec); // one of AvcodecLibrary.CodecID 
     Objects.requireNonNull(codec); 
     codec.setAutoWrite(false); 
     try (
      SharedPtr<AVFormatContext> fmtCtxPtr = new SharedPtr<AVFormatContext>(avformat.avformat_alloc_context()) {@Override public void close(){if (null!=ptr) avformat.avformat_free_context(ptr);}}; 
      ) { 
      AVFormatContext fmtCtx = Objects.requireNonNull(fmtCtxPtr.ptr); 
      fmtCtx.setAutoWrite(false); 
      fmtCtx.setAutoRead(false); 
      fmtCtx.oformat = format; fmtCtx.writeField("oformat"); 

      AVStream st = avformat.avformat_new_stream(fmtCtx, codec); 
      if (null == st) 
       throw new IllegalStateException(); 
      AVCodecContext c = st.codec; 
      if (null == c) 
       throw new IllegalStateException(); 
      st.setAutoWrite(false); 
      fmtCtx.readField("nb_streams"); 
      st.id = fmtCtx.nb_streams - 1; st.writeField("id"); 
      assert st.id >= 0; 
      System.out.format("New stream: id=%d%n", st.id); 

      if (0 != (format.flags & AvformatLibrary.AVFMT_GLOBALHEADER)) { 
       c.flags |= AvcodecLibrary.CODEC_FLAG_GLOBAL_HEADER; 
      } 
      c.writeField("flags"); 

      c.bit_rate = 64000; c.writeField("bit_rate"); 
      int bestSampleRate; 
      if (null == codec.supported_samplerates) { 
       bestSampleRate = 44100; 
      } else { 
       bestSampleRate = 0; 
       for (int offset = 0, sample_rate = codec.supported_samplerates.getInt(offset); sample_rate != 0; codec.supported_samplerates.getInt(++offset)) { 
        bestSampleRate = Math.max(bestSampleRate, sample_rate); 
       } 
       assert bestSampleRate > 0; 
      } 
      c.sample_rate = bestSampleRate; c.writeField("sample_rate"); 
      c.channel_layout = AvutilLibrary.AV_CH_LAYOUT_STEREO; c.writeField("channel_layout"); 
      c.channels = avutil.av_get_channel_layout_nb_channels(c.channel_layout); c.writeField("channels"); 
      assert 2 == c.channels; 
      c.sample_fmt = AvutilLibrary.AVSampleFormat.AV_SAMPLE_FMT_S16; c.writeField("sample_fmt"); 
      c.time_base.num = 1; 
      c.time_base.den = bestSampleRate; 
      c.writeField("time_base"); 
      c.setAutoWrite(false); 

      AudioFormat javaSoundFormat = new AudioFormat(bestSampleRate, Short.SIZE, c.channels, true, ByteOrder.nativeOrder() == ByteOrder.BIG_ENDIAN); 
      DataLine.Info javaDataLineInfo = new DataLine.Info(TargetDataLine.class, javaSoundFormat); 
      if (! AudioSystem.isLineSupported(javaDataLineInfo)) 
       throw new IllegalStateException(); 
      int err; 
      if ((err = avcodec.avcodec_open(c, codec)) < 0) { 
       throw new IllegalStateException(); 
      } 
      assert c.channels != 0; 

      AVIOContext.ByReference[] ioCtxReference = new AVIOContext.ByReference[1]; 
      if (0 != (err = avformat.avio_open(ioCtxReference, file_url, AvformatLibrary.AVIO_FLAG_WRITE))) { 
       throw new IllegalStateException("averror " + err); 
      } 
      try (
       SharedPtr<AVIOContext.ByReference> ioCtxPtr = new SharedPtr<AVIOContext.ByReference>(ioCtxReference[0]) {@Override public void close(){if (null!=ptr) avutil.av_free(ptr.getPointer());}} 
       ) { 
       AVIOContext.ByReference ioCtx = Objects.requireNonNull(ioCtxPtr.ptr); 
       fmtCtx.pb = ioCtx; fmtCtx.writeField("pb"); 
       int averr = avformat.avformat_write_header(fmtCtx, null); 
       if (averr < 0) { 
        throw new IllegalStateException("" + averr); 
       } 
       st.read(); // it is modified by avformat_write_header 
       System.out.format("Wrote header. fmtCtx->nb_streams=%d, st->time_base=%d/%d; st->avg_frame_rate=%d/%d%n", fmtCtx.nb_streams, st.time_base.num, st.time_base.den, st.avg_frame_rate.num, st.avg_frame_rate.den); 
       avformat.avio_flush(ioCtx); 
       int frame_size = c.frame_size != 0 ? c.frame_size : 4096; 
       int expectedBufferSize = frame_size * c.channels * (Short.SIZE/8); 
       boolean supports_small_last_frame = c.frame_size == 0 ? true : 0 != (codec.capabilities & AvcodecLibrary.CODEC_CAP_SMALL_LAST_FRAME); 
       int bufferSize = avutil.av_samples_get_buffer_size((IntBuffer)null, c.channels, frame_size, c.sample_fmt, 1); 
       assert bufferSize == expectedBufferSize: String.format("expected %d; got %d", expectedBufferSize, bufferSize); 
       ByteBuffer samples = ByteBuffer.allocate(expectedBufferSize); 
       samples.order(ByteOrder.nativeOrder()); 
       int audio_time = 0; // unit: (c.time_base) s = (1/c.sample_rate) s 
       int audio_sample_count = supports_small_last_frame ? 
        3 * c.sample_rate : 
        3 * c.sample_rate/frame_size * frame_size; 
       while (audio_time < audio_sample_count) { 
        int frame_audio_time = audio_time; 
        samples.clear(); 
        int nb_samples_in_frame = 0; 
        // encode a single tone sound 
        for (; samples.hasRemaining() && audio_time < audio_sample_count; nb_samples_in_frame++, audio_time++) { 
         double x = 2*Math.PI*440/c.sample_rate * audio_time; 
         double y = 10000 * Math.sin(x); 
         samples.putShort((short) y); 
         samples.putShort((short) y); 
        } 
        samples.flip(); 
        try (
          SharedPtr<AVFrame> framePtr = new SharedPtr<AVFrame>(avcodec.avcodec_alloc_frame()) {@Override public void close() {if (null!=ptr) avutil.av_free(ptr.getPointer());}}; 
          ) { 
         AVFrame frame = Objects.requireNonNull(framePtr.ptr); 
         frame.setAutoRead(false); // will be an in param 
         frame.setAutoWrite(false); 
         frame.nb_samples = nb_samples_in_frame; frame.writeField("nb_samples"); // actually unused during encoding 
         // Presentation time, in AVStream.time_base units. 
         frame.pts = avutil.av_rescale_q(frame_audio_time, c.time_base, st.time_base); // i * codec_time_base/st_time_base 
         frame.writeField("pts"); 

         assert c.channels > 0; 
         int bytesPerSample = avutil.av_get_bytes_per_sample(c.sample_fmt); 
         assert bytesPerSample > 0; 
         if (0 != (err = avcodec.avcodec_fill_audio_frame(frame, c.channels, c.sample_fmt, samples, samples.capacity(), 1))) { 
          throw new IllegalStateException(""+err); 
         } 
         AVPacket packet = new AVPacket(); // one of the few structs from ffmpeg with guaranteed size 
         avcodec.av_init_packet(packet); 
         packet.size = 0; 
         packet.data = null; 
         packet.stream_index = st.index; packet.writeField("stream_index"); 
         // encode the samples 
         IntBuffer gotPacket = IntBuffer.allocate(1); 
         if (0 != (err = avcodec.avcodec_encode_audio2(c, packet, frame, gotPacket))) { 
          throw new IllegalStateException("" + err); 
         } else if (0 != gotPacket.get()) { 
          packet.read(); 
          averr = avformat.av_write_frame(fmtCtx, packet); 
          if (averr < 0) 
           throw new IllegalStateException("" + averr); 
         } 
         System.out.format("encoded frame: codec time = %d; pts=%d = av_rescale_q(%d,%d/%d,%d/%d) (%.02fs) contains %d samples (%.02fs); got_packet=%d; packet.size=%d%n", 
           frame_audio_time, 
           frame.pts, 
           frame_audio_time, st.codec.time_base.num,st.codec.time_base.den,st.time_base.num,st.time_base.den, 
           1.*frame_audio_time/c.sample_rate, frame.nb_samples, 1.*frame.nb_samples/c.sample_rate, gotPacket.array()[0], packet.size); 
        } 
       } 
       if (0 != (err = avformat.av_write_trailer(fmtCtx))) { 
        throw new IllegalStateException(); 
       } 
       avformat.avio_flush(ioCtx); 
      } 
     } 
     System.out.println("Done writing"); 
    } 
} 

또한 C로 다시 작성했으며 C 버전은 아무런 문제없이 잘 작동합니다. 하지만 어떻게 라이브러리를 다르게 사용하는지 알 수는 없습니다. 모든 라이브러리 함수 호출은 동일해야합니다!

//! gcc --std=c99 sin.c $(pkg-config --cflags --libs libavutil libavformat libavcodec) -o sin 
// sudo apt-get install libswscale-dev 
#include <stdlib.h> 
#include <stdio.h> 
#include <string.h> 
#include <math.h> 

#include <libavutil/opt.h> 
#include <libavutil/mathematics.h> 
#include <libavformat/avformat.h> 
#include <libswscale/swscale.h> 
#include <libavcodec/avcodec.h> 
int main(int argc, char *argv[]) { 
    const char *format_name = "wav", *file_url = "file:sin.wav"; 
    avcodec_register_all(); 
    av_register_all(); 
    AVOutputFormat *format = NULL; 
    for (AVOutputFormat *formatIter = av_oformat_next(NULL); formatIter != NULL; formatIter = av_oformat_next(formatIter)) { 
    int hasEncoder = NULL != avcodec_find_encoder(formatIter->audio_codec); 
    if (0 == strcmp(format_name, formatIter->name)) { 
     format = formatIter; 
     break; 
    } 
    } 
    printf("Found format %s\n", format->name); 
    AVCodec *codec = avcodec_find_encoder(format->audio_codec); 
    if (! codec) { 
    fprintf(stderr, "Could not find codec %d\n", format->audio_codec); 
    exit(1); 
    } 
    AVFormatContext *fmtCtx = avformat_alloc_context(); 
    if (! fmtCtx) { 
    fprintf(stderr, "error allocating AVFormatContext\n"); 
    exit(1); 
    } 
    fmtCtx->oformat = format; 
    AVStream *st = avformat_new_stream(fmtCtx, codec); 
    if (! st) { 
    fprintf(stderr, "error allocating AVStream\n"); 
    exit(1); 
    } 
    if (fmtCtx->nb_streams != 1) { 
    fprintf(stderr, "avformat_new_stream should have incremented nb_streams, but it's still %d\n", fmtCtx->nb_streams); 
    exit(1); 
    } 
    AVCodecContext *c = st->codec; 
    if (! c) { 
    fprintf(stderr, "avformat_new_stream should have allocated a AVCodecContext for my stream\n"); 
    exit(1); 
    } 
    st->id = fmtCtx->nb_streams - 1; 
    printf("Created stream %d\n", st->id); 
    if (0 != (format->flags & AVFMT_GLOBALHEADER)) { 
    c->flags |= CODEC_FLAG_GLOBAL_HEADER; 
    } 
    c->bit_rate = 64000; 
    int bestSampleRate; 
    if (NULL == codec->supported_samplerates) { 
    bestSampleRate = 44100; 
    printf("Setting sample rate: %d\n", bestSampleRate); 
    } else { 
    bestSampleRate = 0; 
    for (const int *sample_rate_iter = codec->supported_samplerates; *sample_rate_iter != 0; sample_rate_iter++) { 
     if (*sample_rate_iter >= bestSampleRate) 
     bestSampleRate = *sample_rate_iter; 
    } 
    printf("Using best supported sample rate: %d\n", bestSampleRate); 
    } 
    c->sample_rate = bestSampleRate; 
    c->channel_layout = AV_CH_LAYOUT_STEREO; 
    c->channels = av_get_channel_layout_nb_channels(c->channel_layout); 
    c->time_base.num = 1; 
    c->time_base.den = c->sample_rate; 
    if (c->channels != 2) { 
    fprintf(stderr, "av_get_channel_layout_nb_channels returned %d instead of 2\n", c->channels); 
    exit(1); 
    } 
    c->sample_fmt = AV_SAMPLE_FMT_S16; 
    int averr; 
    if ((averr = avcodec_open2(c, codec, NULL)) < 0) { 
    fprintf(stderr, "avcodec_open2 returned error %d\n", averr); 
    exit(1); 
    } 
    AVIOContext *ioCtx = NULL; 
    if (0 != (averr = avio_open(&ioCtx, file_url, AVIO_FLAG_WRITE))) { 
    fprintf(stderr, "avio_open returned error %d\n", averr); 
    exit(1); 
    } 
    if (ioCtx == NULL) { 
    fprintf(stderr, "AVIOContext should have been set by avio_open\n"); 
    exit(1); 
    } 
    fmtCtx->pb = ioCtx; 
    if (0 != (averr = avformat_write_header(fmtCtx, NULL))) { 
    fprintf(stderr, "avformat_write_header returned error %d\n", averr); 
    exit(1); 
    } 
    printf("Wrote header. fmtCtx->nb_streams=%d, st->time_base=%d/%d; st->avg_frame_rate=%d/%d\n", fmtCtx->nb_streams, st->time_base.num, st->time_base.den, st->avg_frame_rate.num, st->avg_frame_rate.den); 
    int align = 1; 
    int sample_size = av_get_bytes_per_sample(c->sample_fmt); 
    if (sample_size != sizeof(int16_t)) { 
    fprintf(stderr, "expected sample size=%zu but got %d\n", sizeof(int16_t), sample_size); 
    exit(1); 
    } 
    int frame_size = c->frame_size != 0 ? c->frame_size : 4096; 
    int bufferSize = av_samples_get_buffer_size(NULL, c->channels, frame_size, c->sample_fmt, align); 
    int expectedBufferSize = frame_size * c->channels * sample_size; 
    int supports_small_last_frame = c->frame_size == 0 ? 1 : 0 != (codec->capabilities & CODEC_CAP_SMALL_LAST_FRAME); 
    if (bufferSize != expectedBufferSize) { 
    fprintf(stderr, "expected buffer size=%d but got %d\n", expectedBufferSize, bufferSize); 
    exit(1); 
    } 
    int16_t *samples = (int16_t*)malloc(bufferSize); 

    uint32_t audio_time = 0; // unit: (1/c->sample_rate) s 
    uint32_t audio_sample_count = supports_small_last_frame ? 
    3 * c->sample_rate : 
    3 * c->sample_rate/frame_size * frame_size; 
    while (audio_time < audio_sample_count) { 
    uint32_t frame_audio_time = audio_time; // unit: (1/c->sample_rate) s 
    AVFrame *frame = avcodec_alloc_frame(); 
    if (frame == NULL) { 
     fprintf(stderr, "avcodec_alloc_frame failed\n"); 
     exit(1); 
    } 
    for (uint32_t i = 0; i != frame_size && audio_time < audio_sample_count; i++, audio_time++) { 
     samples[2*i] = samples[2*i + 1] = 10000 * sin(2*M_PI*440/c->sample_rate * audio_time); 
     frame->nb_samples = i+1; // actually unused during encoding 
    } 
    // frame->format = c->sample_fmt; // unused during encoding 
    frame->pts = av_rescale_q(frame_audio_time, c->time_base, st->time_base); 
    if (0 != (averr = avcodec_fill_audio_frame(frame, c->channels, c->sample_fmt, (const uint8_t*)samples, bufferSize, align))) { 
     fprintf(stderr, "avcodec_fill_audio_frame returned error %d\n", averr); 
     exit(1); 
    } 
    AVPacket packet; 
    av_init_packet(&packet); 
    packet.data = NULL; 
    packet.size = 0; 
    int got_packet; 
    if (0 != (averr = avcodec_encode_audio2(c, &packet, frame, &got_packet))) { 
     fprintf(stderr, "avcodec_encode_audio2 returned error %d\n", averr); 
     exit(1); 
    } 
    if (got_packet) { 
     packet.stream_index = st->index; 
     if (0 < (averr = av_write_frame(fmtCtx, &packet))) { 
     fprintf(stderr, "av_write_frame returned error %d\n", averr); 
     exit(1); 
     } else if (averr == 1) { 
     // end of stream wanted. 
     } 
    } 
    printf("encoded frame: codec time = %u; format pts=%ld = av_rescale_q(%u,%d/%d,%d/%d) (%.02fs) contains %d samples (%.02fs); got_packet=%d; packet.size=%d\n", 
     frame_audio_time, 
     frame->pts, 
     frame_audio_time, c->time_base.num, c->time_base.den, st->time_base.num, st->time_base.den, 
     1.*frame_audio_time/c->sample_rate, frame->nb_samples, 1.*frame->nb_samples/c->sample_rate, got_packet, packet.size); 
    av_free(frame); 
    } 
    free(samples); 
    cleanupFile: 
    if (0 != (averr = av_write_trailer(fmtCtx))) { 
    fprintf(stderr, "av_write_trailer returned error %d\n", averr); 
    exit(1); 
    } 

    avio_flush(ioCtx); 
    avio_close(ioCtx); 
    avformat_free_context(fmtCtx); 
} 

답변

2

ByteBuffer.allocate(int)은 주소가 JNA 함수 호출에서 안정적이지 않은 버퍼를 생성한다는 것이 문제였습니다. 네이티브 함수를 호출 할 때마다, 그 호출을 위해서만 바이트를 임시 배열에 복사합니다. 반대로 ByteBuffer.allocateDirect(int)은 네이티브 포인터가 안정된 버퍼를 만듭니다. 이것은 분명히 well-known pitfall of using ByteBuffer in JNA이지만, 나는 Using Pointers and Arrays의 작은 글씨에서 그것을 알아 차리지 못했습니다.

그래서 샘플 생성을 ByteBuffer samples = ByteBuffer.allocateDirect(expectedBufferSize);으로 수정해야했습니다. 그 다음에 avcodec_fill_audio_frame 호출은 을 복사하지 않습니다.samples; frame->data[0]uint8_t* 주소로 가리키기 때문에 samples 배열은 안정된 주소를 가져야합니다.