@@ -666,6 +666,7 @@ static void free_tables(H264Context *h){
666666 av_freep (& h -> non_zero_count );
667667 av_freep (& h -> slice_table_base );
668668 h -> slice_table = NULL ;
669+ av_freep (& h -> list_counts );
669670
670671 av_freep (& h -> mb2b_xy );
671672 av_freep (& h -> mb2b8_xy );
@@ -756,14 +757,15 @@ int ff_h264_alloc_tables(H264Context *h){
756757
757758 FF_ALLOCZ_OR_GOTO (h -> s .avctx , h -> intra4x4_pred_mode , big_mb_num * 8 * sizeof (uint8_t ), fail )
758759
759- FF_ALLOCZ_OR_GOTO (h -> s .avctx , h -> non_zero_count , big_mb_num * 16 * sizeof (uint8_t ), fail )
760+ FF_ALLOCZ_OR_GOTO (h -> s .avctx , h -> non_zero_count , big_mb_num * 32 * sizeof (uint8_t ), fail )
760761 FF_ALLOCZ_OR_GOTO (h -> s .avctx , h -> slice_table_base , (big_mb_num + s -> mb_stride ) * sizeof (* h -> slice_table_base ), fail )
761762 FF_ALLOCZ_OR_GOTO (h -> s .avctx , h -> cbp_table , big_mb_num * sizeof (uint16_t ), fail )
762763
763764 FF_ALLOCZ_OR_GOTO (h -> s .avctx , h -> chroma_pred_mode_table , big_mb_num * sizeof (uint8_t ), fail )
764765 FF_ALLOCZ_OR_GOTO (h -> s .avctx , h -> mvd_table [0 ], 32 * big_mb_num * sizeof (uint16_t ), fail );
765766 FF_ALLOCZ_OR_GOTO (h -> s .avctx , h -> mvd_table [1 ], 32 * big_mb_num * sizeof (uint16_t ), fail );
766767 FF_ALLOCZ_OR_GOTO (h -> s .avctx , h -> direct_table , 32 * big_mb_num * sizeof (uint8_t ) , fail );
768+ FF_ALLOCZ_OR_GOTO (h -> s .avctx , h -> list_counts , big_mb_num * sizeof (uint8_t ), fail )
767769
768770 memset (h -> slice_table_base , -1 , (big_mb_num + s -> mb_stride ) * sizeof (* h -> slice_table_base ));
769771 h -> slice_table = h -> slice_table_base + s -> mb_stride * 2 + 1 ;
@@ -945,21 +947,14 @@ int ff_h264_frame_start(H264Context *h){
945947
946948static inline void backup_mb_border (H264Context * h , uint8_t * src_y , uint8_t * src_cb , uint8_t * src_cr , int linesize , int uvlinesize , int simple ){
947949 MpegEncContext * const s = & h -> s ;
948- int i ;
949- int step = 1 ;
950- int offset = 1 ;
951- int uvoffset = 1 ;
952950 int top_idx = 1 ;
953- int skiplast = 0 ;
954951
955952 src_y -= linesize ;
956953 src_cb -= uvlinesize ;
957954 src_cr -= uvlinesize ;
958955
959956 if (!simple && FRAME_MBAFF ){
960957 if (s -> mb_y & 1 ){
961- offset = MB_MBAFF ? 1 : 17 ;
962- uvoffset = MB_MBAFF ? 1 : 9 ;
963958 if (!MB_MBAFF ){
964959 * (uint64_t * )(h -> top_borders [0 ][s -> mb_x ]+ 0 )= * (uint64_t * )(src_y + 15 * linesize );
965960 * (uint64_t * )(h -> top_borders [0 ][s -> mb_x ]+ 8 )= * (uint64_t * )(src_y + 8 + 15 * linesize );
@@ -968,39 +963,19 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src
968963 * (uint64_t * )(h -> top_borders [0 ][s -> mb_x ]+ 24 )= * (uint64_t * )(src_cr + 7 * uvlinesize );
969964 }
970965 }
971- }else {
972- if (!MB_MBAFF ){
973- h -> left_border [0 ]= h -> top_borders [0 ][s -> mb_x ][15 ];
974- if (simple || !CONFIG_GRAY || !(s -> flags & CODEC_FLAG_GRAY )){
975- h -> left_border [34 ]= h -> top_borders [0 ][s -> mb_x ][16 + 7 ];
976- h -> left_border [34 + 18 ]= h -> top_borders [0 ][s -> mb_x ][16 + 8 + 7 ];
977- }
978- skiplast = 1 ;
979- }
980- offset =
981- uvoffset =
982- top_idx = MB_MBAFF ? 0 : 1 ;
983- }
984- step = MB_MBAFF ? 2 : 1 ;
966+ }else if (MB_MBAFF ){
967+ top_idx = 0 ;
968+ }else
969+ return ;
985970 }
986971
987972 // There are two lines saved, the line above the the top macroblock of a pair,
988973 // and the line above the bottom macroblock
989- h -> left_border [offset ]= h -> top_borders [top_idx ][s -> mb_x ][15 ];
990- for (i = 1 ; i < 17 - skiplast ; i ++ ){
991- h -> left_border [offset + i * step ]= src_y [15 + i * linesize ];
992- }
993974
994975 * (uint64_t * )(h -> top_borders [top_idx ][s -> mb_x ]+ 0 )= * (uint64_t * )(src_y + 16 * linesize );
995976 * (uint64_t * )(h -> top_borders [top_idx ][s -> mb_x ]+ 8 )= * (uint64_t * )(src_y + 8 + 16 * linesize );
996977
997978 if (simple || !CONFIG_GRAY || !(s -> flags & CODEC_FLAG_GRAY )){
998- h -> left_border [uvoffset + 34 ]= h -> top_borders [top_idx ][s -> mb_x ][16 + 7 ];
999- h -> left_border [uvoffset + 34 + 18 ]= h -> top_borders [top_idx ][s -> mb_x ][24 + 7 ];
1000- for (i = 1 ; i < 9 - skiplast ; i ++ ){
1001- h -> left_border [uvoffset + 34 + i * step ]= src_cb [7 + i * uvlinesize ];
1002- h -> left_border [uvoffset + 34 + 18 + i * step ]= src_cr [7 + i * uvlinesize ];
1003- }
1004979 * (uint64_t * )(h -> top_borders [top_idx ][s -> mb_x ]+ 16 )= * (uint64_t * )(src_cb + 8 * uvlinesize );
1005980 * (uint64_t * )(h -> top_borders [top_idx ][s -> mb_x ]+ 24 )= * (uint64_t * )(src_cr + 8 * uvlinesize );
1006981 }
@@ -1013,21 +988,15 @@ static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_c
1013988 int deblock_left ;
1014989 int deblock_top ;
1015990 int mb_xy ;
1016- int step = 1 ;
1017- int offset = 1 ;
1018- int uvoffset = 1 ;
1019991 int top_idx = 1 ;
1020992
1021993 if (!simple && FRAME_MBAFF ){
1022994 if (s -> mb_y & 1 ){
1023- offset = MB_MBAFF ? 1 : 17 ;
1024- uvoffset = MB_MBAFF ? 1 : 9 ;
995+ if (! MB_MBAFF )
996+ return ;
1025997 }else {
1026- offset =
1027- uvoffset =
1028998 top_idx = MB_MBAFF ? 0 : 1 ;
1029999 }
1030- step = MB_MBAFF ? 2 : 1 ;
10311000 }
10321001
10331002 if (h -> deblocking_filter == 2 ) {
@@ -1049,14 +1018,10 @@ if(xchg)\
10491018 a= b;\
10501019b= t;
10511020
1052- if (deblock_left ){
1053- for (i = !deblock_top ; i < 16 ; i ++ ){
1054- XCHG (h -> left_border [offset + i * step ], src_y [i * linesize ], temp8 , xchg );
1055- }
1056- XCHG (h -> left_border [offset + i * step ], src_y [i * linesize ], temp8 , 1 );
1057- }
1058-
10591021 if (deblock_top ){
1022+ if (deblock_left ){
1023+ XCHG (* (uint64_t * )(h -> top_borders [top_idx ][s -> mb_x - 1 ]+ 8 ), * (uint64_t * )(src_y - 7 ), temp64 , 1 );
1024+ }
10601025 XCHG (* (uint64_t * )(h -> top_borders [top_idx ][s -> mb_x ]+ 0 ), * (uint64_t * )(src_y + 1 ), temp64 , xchg );
10611026 XCHG (* (uint64_t * )(h -> top_borders [top_idx ][s -> mb_x ]+ 8 ), * (uint64_t * )(src_y + 9 ), temp64 , 1 );
10621027 if (s -> mb_x + 1 < s -> mb_width ){
@@ -1065,15 +1030,11 @@ b= t;
10651030 }
10661031
10671032 if (simple || !CONFIG_GRAY || !(s -> flags & CODEC_FLAG_GRAY )){
1068- if (deblock_left ){
1069- for (i = !deblock_top ; i < 8 ; i ++ ){
1070- XCHG (h -> left_border [uvoffset + 34 + i * step ], src_cb [i * uvlinesize ], temp8 , xchg );
1071- XCHG (h -> left_border [uvoffset + 34 + 18 + i * step ], src_cr [i * uvlinesize ], temp8 , xchg );
1072- }
1073- XCHG (h -> left_border [uvoffset + 34 + i * step ], src_cb [i * uvlinesize ], temp8 , 1 );
1074- XCHG (h -> left_border [uvoffset + 34 + 18 + i * step ], src_cr [i * uvlinesize ], temp8 , 1 );
1075- }
10761033 if (deblock_top ){
1034+ if (deblock_left ){
1035+ XCHG (* (uint64_t * )(h -> top_borders [top_idx ][s -> mb_x - 1 ]+ 16 ), * (uint64_t * )(src_cb - 7 ), temp64 , 1 );
1036+ XCHG (* (uint64_t * )(h -> top_borders [top_idx ][s -> mb_x - 1 ]+ 24 ), * (uint64_t * )(src_cr - 7 ), temp64 , 1 );
1037+ }
10771038 XCHG (* (uint64_t * )(h -> top_borders [top_idx ][s -> mb_x ]+ 16 ), * (uint64_t * )(src_cb + 1 ), temp64 , 1 );
10781039 XCHG (* (uint64_t * )(h -> top_borders [top_idx ][s -> mb_x ]+ 24 ), * (uint64_t * )(src_cr + 1 ), temp64 , 1 );
10791040 }
@@ -1103,6 +1064,8 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
11031064 s -> dsp .prefetch (dest_y + (s -> mb_x & 3 )* 4 * s -> linesize + 64 , s -> linesize , 4 );
11041065 s -> dsp .prefetch (dest_cb + (s -> mb_x & 7 )* s -> uvlinesize + 64 , dest_cr - dest_cb , 2 );
11051066
1067+ h -> list_counts [mb_xy ]= h -> list_count ;
1068+
11061069 if (!simple && MB_FIELD ) {
11071070 linesize = h -> mb_linesize = s -> linesize * 2 ;
11081071 uvlinesize = h -> mb_uvlinesize = s -> uvlinesize * 2 ;
@@ -1322,7 +1285,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
13221285 if (h -> cbp || IS_INTRA (mb_type ))
13231286 s -> dsp .clear_blocks (h -> mb );
13241287
1325- if (h -> deblocking_filter ) {
1288+ if (h -> deblocking_filter && 0 ) {
13261289 backup_mb_border (h , dest_y , dest_cb , dest_cr , linesize , uvlinesize , simple );
13271290 fill_caches (h , mb_type , 1 ); //FIXME don't fill stuff which isn't used by filter_mb
13281291 h -> chroma_qp [0 ] = get_chroma_qp (h , 0 , s -> current_picture .qscale_table [mb_xy ]);
@@ -2174,6 +2137,70 @@ int ff_h264_get_slice_type(H264Context *h)
21742137 }
21752138}
21762139
2140+ static void loop_filter (H264Context * h ){
2141+ MpegEncContext * const s = & h -> s ;
2142+ uint8_t * dest_y , * dest_cb , * dest_cr ;
2143+ int linesize , uvlinesize , mb_x , mb_y ;
2144+ const int end_mb_y = s -> mb_y + FRAME_MBAFF ;
2145+ const int old_slice_type = h -> slice_type ;
2146+
2147+ if (h -> deblocking_filter ) {
2148+ for (mb_x = 0 ; mb_x < s -> mb_width ; mb_x ++ ){
2149+ for (mb_y = end_mb_y - FRAME_MBAFF ; mb_y <= end_mb_y ; mb_y ++ ){
2150+ int list , mb_xy , mb_type , is_complex ;
2151+ mb_xy = h -> mb_xy = mb_x + mb_y * s -> mb_stride ;
2152+ h -> slice_num = h -> slice_table [mb_xy ];
2153+ mb_type = s -> current_picture .mb_type [mb_xy ];
2154+ h -> list_count = h -> list_counts [mb_xy ];
2155+ if (h -> list_count == 2 ){
2156+ h -> slice_type = h -> slice_type_nos = FF_B_TYPE ;
2157+ }else if (h -> list_count == 1 ){
2158+ h -> slice_type = h -> slice_type_nos = FF_P_TYPE ;
2159+ }else
2160+ h -> slice_type = h -> slice_type_nos = FF_I_TYPE ;
2161+
2162+ if (FRAME_MBAFF )
2163+ h -> mb_mbaff = h -> mb_field_decoding_flag = !!IS_INTERLACED (mb_type );
2164+
2165+ is_complex = CONFIG_SMALL || h -> is_complex || IS_INTRA_PCM (mb_type ) || s -> qscale == 0 ; //FIXME qscale might be wrong
2166+
2167+ s -> mb_x = mb_x ;
2168+ s -> mb_y = mb_y ;
2169+ dest_y = s -> current_picture .data [0 ] + (mb_x + mb_y * s -> linesize ) * 16 ;
2170+ dest_cb = s -> current_picture .data [1 ] + (mb_x + mb_y * s -> uvlinesize ) * 8 ;
2171+ dest_cr = s -> current_picture .data [2 ] + (mb_x + mb_y * s -> uvlinesize ) * 8 ;
2172+ //FIXME simplify above
2173+
2174+ if (MB_FIELD ) {
2175+ linesize = h -> mb_linesize = s -> linesize * 2 ;
2176+ uvlinesize = h -> mb_uvlinesize = s -> uvlinesize * 2 ;
2177+ if (mb_y & 1 ){ //FIXME move out of this function?
2178+ dest_y -= s -> linesize * 15 ;
2179+ dest_cb -= s -> uvlinesize * 7 ;
2180+ dest_cr -= s -> uvlinesize * 7 ;
2181+ }
2182+ } else {
2183+ linesize = h -> mb_linesize = s -> linesize ;
2184+ uvlinesize = h -> mb_uvlinesize = s -> uvlinesize ;
2185+ }
2186+ backup_mb_border (h , dest_y , dest_cb , dest_cr , linesize , uvlinesize , !is_complex );
2187+ fill_caches (h , mb_type , 1 ); //FIXME don't fill stuff which isn't used by filter_mb
2188+ h -> chroma_qp [0 ] = get_chroma_qp (h , 0 , s -> current_picture .qscale_table [mb_xy ]);
2189+ h -> chroma_qp [1 ] = get_chroma_qp (h , 1 , s -> current_picture .qscale_table [mb_xy ]);
2190+
2191+ if (is_complex && FRAME_MBAFF ) {
2192+ ff_h264_filter_mb (h , mb_x , mb_y , dest_y , dest_cb , dest_cr , linesize , uvlinesize );
2193+ } else {
2194+ ff_h264_filter_mb_fast (h , mb_x , mb_y , dest_y , dest_cb , dest_cr , linesize , uvlinesize );
2195+ }
2196+ }
2197+ }
2198+ }
2199+ h -> slice_type = old_slice_type ;
2200+ s -> mb_x = 0 ;
2201+ s -> mb_y = end_mb_y - FRAME_MBAFF ;
2202+ }
2203+
21772204static int decode_slice (struct AVCodecContext * avctx , void * arg ){
21782205 H264Context * h = * (void * * )arg ;
21792206 MpegEncContext * const s = & h -> s ;
@@ -2222,6 +2249,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg){
22222249
22232250 if ( ++ s -> mb_x >= s -> mb_width ) {
22242251 s -> mb_x = 0 ;
2252+ loop_filter (h );
22252253 ff_draw_horiz_band (s , 16 * s -> mb_y , 16 );
22262254 ++ s -> mb_y ;
22272255 if (FIELD_OR_MBAFF_PICTURE ) {
@@ -2259,6 +2287,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg){
22592287
22602288 if (++ s -> mb_x >= s -> mb_width ){
22612289 s -> mb_x = 0 ;
2290+ loop_filter (h );
22622291 ff_draw_horiz_band (s , 16 * s -> mb_y , 16 );
22632292 ++ s -> mb_y ;
22642293 if (FIELD_OR_MBAFF_PICTURE ) {
0 commit comments