视频解码相关概念

首先简单介绍以下视频文件的相关知识。我们平时看到的视频文件有许多格式,比如 avi, mkv, rmvb, mov, mp4等等,这些被称为容器(Container), 不同的容器格式规定了其中音视频数据的组织方式(也包括其他数据,比如字幕等)。容器中一般会封装有视频和音频轨,也称为视频流(stream)和音频 流,播放视频文件的第一步就是根据视频文件的格式,解析(demux)出其中封装的视频流、音频流以及字幕(如果有的话),解析的数据读到包 (packet)中,每个包里保存的是视频帧(frame)或音频帧,然后分别对视频帧和音频帧调用相应的解码器(decoder)进行解码,比如使用 H.264编码的视频和MP3编码的音频,会相应的调用H.264解码器和MP3解码器,解码之后得到的就是原始的图像(YUV or RGB)和声音(PCM)数据,然后根据同步好的时间将图像显示到屏幕上,将声音输出到声卡,最终就是我们看到的视频。

FFmpeg的API就是根据这个过程设计的,因此使用FFmpeg来处理视频文件的方法非常直观简单。下面就一步一步介绍从视频文件中解码出图片的过程

数据结构介绍

1. AVFormatContext:保存需要读入的文件的格式信息,比如流的个数以及流数据等
2. AVCodecCotext:保存了相应流的详细编码信息,比如视频的宽、高,编码类型等。
3. pCodec:真正的编解码器,其中有编解码需要调用的函数
4. AVFrame:用于保存数据帧的数据结构,这里的两个帧分别是保存颜色转换前后的两帧图像
5. AVPacket:解析文件时会将音/视频帧读入到packet中

ffmpeg 视频解码播放

ios 实例代码

- (void) decodeVideo:(NSURL*)url {
    AVFormatContext    *pFormatCtx;
    int                i, videoindex;
    AVCodecContext    *pCodecCtx;
    AVCodec            *pCodec;
    AVFrame    *pFrame,*pFrameYUV;
    uint8_t* out_buffer;
    AVPacket *packet;
    int y_size;
    int ret;
    struct SwsContext *img_convert_ctx;
    FILE *fp_yuv;
    int frame_cnt;
    clock_t time_start, time_finish;
    double  time_duration = 0.0;

    char input_str_full[512] = {0};
    char output_str_full[512] = {0};
    char err[512] = {0};
    char info[1024] = {0};

    NSString *output_str= [NSString stringWithFormat:@"resource/%@.yuv", [url lastPathComponent]];

    NSString *output_nsstr=[[[NSBundle mainBundle] resourcePath] stringByAppendingPathComponent:output_str];

    sprintf(input_str_full, "%s", [[url path] UTF8String]);
    sprintf(output_str_full, "%s", [output_nsstr UTF8String]);

    printf("Input Path: %s\n", input_str_full);
    printf("Output Path:%s\n",output_str_full);

    // av_register_all();
    // avformat_network_init();
    pFormatCtx = avformat_alloc_context();

    // av_register_all();
    // 1. 打开视频
    ret = avformat_open_input(&pFormatCtx, input_str_full, NULL, NULL);
    if(ret != 0){
        av_strerror(ret, err, 512);
        printf("Couldn't open input stream. error: %s\n", err);
        return ;
    }
    // 2. 读取视频的流和码率信息
    if(avformat_find_stream_info(pFormatCtx, NULL)<0){
        printf("Couldn't find stream information.\n");
        return;
    }

    videoindex=-1;
    for(i=0; i<pFormatCtx->nb_streams; i++) {
        if(pFormatCtx->streams[i]->codecpar->codec_type==AVMEDIA_TYPE_VIDEO){
            videoindex=i;
            break;
        }
    }
    if(videoindex==-1){
        printf("Couldn't find a video stream.\n");
        return;
    }

    pCodec = avcodec_find_decoder(pFormatCtx->streams[videoindex]->codecpar->codec_id);
    if(pCodec==NULL){
        printf("Couldn't find Codec.\n");
        return;
    }
    pCodecCtx = avcodec_alloc_context3(pCodec);
    avcodec_parameters_to_context(pCodecCtx, pFormatCtx->streams[videoindex]->codecpar);
    if(avcodec_open2(pCodecCtx, pCodec, NULL)<0){
        printf("Couldn't open codec.\n");
        return;
    }

    pFrame = av_frame_alloc();
    pFrameYUV = av_frame_alloc();
    out_buffer = (uint8_t*)av_malloc(av_image_get_buffer_size(AV_PIX_FMT_YUV420P,  pCodecCtx->width, pCodecCtx->height, 1));
    av_image_fill_arrays(pFrameYUV->data,
                         pFrameYUV->linesize,
                         out_buffer,
                         AV_PIX_FMT_YUV420P,
                         pCodecCtx->width,
                         pCodecCtx->height,
                         1);
    packet=(AVPacket *)av_malloc(sizeof(AVPacket));

    img_convert_ctx = sws_getContext(pCodecCtx->width, pCodecCtx->height, pCodecCtx->pix_fmt,
                                     pCodecCtx->width, pCodecCtx->height, AV_PIX_FMT_YUV420P, SWS_BICUBIC, NULL, NULL, NULL);


    sprintf(info,   "[Input     ] %s\n", [[url absoluteString] UTF8String]);
    sprintf(info, "%s[Output    ] %s\n", info, [output_str UTF8String]);
    sprintf(info, "%s[Format    ] %s\n", info, pFormatCtx->iformat->name);
    sprintf(info, "%s[Codec     ] %s\n", info, pCodecCtx->codec->name);
    sprintf(info, "%s[Resolution] %dx%d\n", info, pCodecCtx->width, pCodecCtx->height);


    fp_yuv=fopen(output_str_full,"wb+");
    if(fp_yuv==NULL){
        printf("Cannot open output file.\n");
        return;
    }

    frame_cnt=0;
    time_start = clock();

    while(av_read_frame(pFormatCtx, packet)>=0){
        if(packet->stream_index==videoindex){
            ret = avcodec_send_packet(pCodecCtx, packet);
            while(ret>=0) {
                ret = avcodec_receive_frame(pCodecCtx, pFrame);
                if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
                    printf("Decode ret value: %d\n", ret);
                    break;
                }
                if(ret < 0){
                    av_strerror(ret, err, 512);
                    printf("Decode Error. error: %s\n", err);
                    printf("info: %s", info);
                    return;
                }
                sws_scale(img_convert_ctx, (const uint8_t* const*)pFrame->data, pFrame->linesize, 0, pCodecCtx->height,
                          pFrameYUV->data, pFrameYUV->linesize);

                y_size=pCodecCtx->width*pCodecCtx->height;
                fwrite(pFrameYUV->data[0],1,y_size,fp_yuv);    //Y
                fwrite(pFrameYUV->data[1],1,y_size/4,fp_yuv);  //U
                fwrite(pFrameYUV->data[2],1,y_size/4,fp_yuv);  //V
                //Output info
                char pictype_str[10]={0};
                switch(pFrame->pict_type){
                    case AV_PICTURE_TYPE_I:sprintf(pictype_str,"I");break;
                    case AV_PICTURE_TYPE_P:sprintf(pictype_str,"P");break;
                    case AV_PICTURE_TYPE_B:sprintf(pictype_str,"B");break;
                    default:sprintf(pictype_str,"Other");break;
                }
                printf("Frame Index: %5d. Type:%s\n",frame_cnt,pictype_str);
                frame_cnt++;
            }
        }
    }
    free(packet);

    time_finish = clock();
    time_duration=(double)(time_finish - time_start);

    sprintf(info, "%s[duration  ] %s\n", info, [[self calcVideoDuration:pFormatCtx->duration] UTF8String]);
    sprintf(info, "%s[decodeTime] %fus\n", info, time_duration);
    sprintf(info, "%s[Count     ] %d\n", info, frame_cnt);

    sws_freeContext(img_convert_ctx);

    fclose(fp_yuv);

    av_frame_free(&pFrameYUV);
    av_frame_free(&pFrame);
    avcodec_close(pCodecCtx);
    avformat_close_input(&pFormatCtx);

    NSString * info_ns = [NSString stringWithFormat:@"%s", info];
    printf("%s", [info_ns UTF8String]);
}

- (NSString*) calcVideoDuration:(int64_t)duration {
    if (duration != AV_NOPTS_VALUE) {
        int64_t hours, mins, secs, us;
        duration = duration + 5000;
        secs  = duration / AV_TIME_BASE;
        us    = duration % AV_TIME_BASE;
        mins  = secs / 60;
        secs %= 60;
        hours = mins / 60;
        mins %= 60;
        return [[NSString alloc] initWithFormat:@"%02lld:%02lld:%02lld.%02lld",
                hours,
                mins,
                secs,
                (100 * us) / AV_TIME_BASE];
    }
    return @"0:0:0";
}
right