From 6060fe6037a28a0a09e17e065ddb9795196356ce Mon Sep 17 00:00:00 2001 From: Wu Jianhua Date: Tue, 15 Apr 2025 04:45:23 +0800 Subject: [PATCH 01/31] avcodec/bit_depth_template: add tpixel for intermediate pixel type The int is needed by motion compensation as intermediate type for 16 bits format Signed-off-by: Wu Jianhua --- libavcodec/bit_depth_template.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/libavcodec/bit_depth_template.c b/libavcodec/bit_depth_template.c index ca5037148a..483f9cdef1 100644 --- a/libavcodec/bit_depth_template.c +++ b/libavcodec/bit_depth_template.c @@ -30,6 +30,7 @@ # undef pixel4 # undef dctcoef # undef idctin +# undef tpixel # undef no_rnd_avg_pixel4 # undef rnd_avg_pixel4 # undef AV_RN2P @@ -52,6 +53,11 @@ # define pixel2 uint32_t # define pixel4 uint64_t # define dctcoef int32_t +#if BIT_DEPTH > 12 +# define tpixel int +#else +# define tpixel int16_t +#endif #ifdef IN_IDCT_DEPTH #if IN_IDCT_DEPTH == 32 @@ -81,6 +87,7 @@ # define pixel4 uint32_t # define dctcoef int16_t # define idctin int16_t +# define tpixel int16_t # define no_rnd_avg_pixel4 no_rnd_avg32 # define rnd_avg_pixel4 rnd_avg32 From d6015a16eb1b308a0e7545cc71e769aa32a47829 Mon Sep 17 00:00:00 2001 From: Wu Jianhua Date: Tue, 15 Apr 2025 04:45:30 +0800 Subject: [PATCH 02/31] avcodec/h26x/h2656_inter_template: fix put pixel functions for high bitdepth Signed-off-by: Wu Jianhua --- libavcodec/h26x/h2656_inter_template.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/libavcodec/h26x/h2656_inter_template.c b/libavcodec/h26x/h2656_inter_template.c index 864f6c7e7d..8811aa56a5 100644 --- a/libavcodec/h26x/h2656_inter_template.c +++ b/libavcodec/h26x/h2656_inter_template.c @@ -26,16 +26,17 @@ #define LUMA_EXTRA_BEFORE 3 #define LUMA_EXTRA 7 -static void FUNC(put_pixels)(int16_t *dst, +static void FUNC(put_pixels)(int16_t *_dst, const uint8_t *_src, const ptrdiff_t _src_stride, const int height, const int8_t *hf, const int8_t *vf, const int width) { const pixel *src = (const pixel *)_src; + tpixel *dst = (tpixel *)_dst; const ptrdiff_t src_stride = _src_stride / sizeof(pixel); for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) - dst[x] = src[x] << (14 - BIT_DEPTH); + dst[x] = (src[x] << (FFMAX(2, 14 - BIT_DEPTH))); src += src_stride; dst += MAX_PB_SIZE; } @@ -66,17 +67,13 @@ static void FUNC(put_uni_w_pixels)(uint8_t *_dst, const ptrdiff_t _dst_stride, pixel *dst = (pixel *)_dst; const ptrdiff_t src_stride = _src_stride / sizeof(pixel); const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); - const int shift = denom + 14 - BIT_DEPTH; -#if BIT_DEPTH < 14 + const int shift = denom + FFMAX(2, 14 - BIT_DEPTH); const int offset = 1 << (shift - 1); -#else - const int offset = 0; -#endif - const int ox = _ox * (1 << (BIT_DEPTH - 8)); + const int ox = _ox * (1 << FFMIN(4, BIT_DEPTH - 8)); for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) { - const int v = (src[x] << (14 - BIT_DEPTH)); + const int v = (src[x] << (FFMAX(2, 14 - BIT_DEPTH))); dst[x] = av_clip_pixel(((v * wx + offset) >> shift) + ox); } src += src_stride; From 6fb787d925a080b184b849a8d7ef039d64435fbd Mon Sep 17 00:00:00 2001 From: Wu Jianhua Date: Tue, 15 Apr 2025 04:45:37 +0800 Subject: [PATCH 03/31] avcodec/h26x/h2656_inter_template: fix put luma functions for high bitdepth Signed-off-by: Wu Jianhua --- libavcodec/h26x/h2656_inter_template.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/libavcodec/h26x/h2656_inter_template.c b/libavcodec/h26x/h2656_inter_template.c index 8811aa56a5..cece87f1ad 100644 --- a/libavcodec/h26x/h2656_inter_template.c +++ b/libavcodec/h26x/h2656_inter_template.c @@ -91,49 +91,55 @@ static void FUNC(put_uni_w_pixels)(uint8_t *_dst, const ptrdiff_t _dst_stride, filter[6] * src[x + 3 * stride] + \ filter[7] * src[x + 4 * stride]) -static void FUNC(put_luma_h)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, +static void FUNC(put_luma_h)(int16_t *_dst, const uint8_t *_src, const ptrdiff_t _src_stride, const int height, const int8_t *hf, const int8_t *vf, const int width) { const pixel *src = (const pixel*)_src; + tpixel *dst = (tpixel *)_dst; const ptrdiff_t src_stride = _src_stride / sizeof(pixel); const int8_t *filter = hf; + const int shift = FFMIN(4, BIT_DEPTH - 8); for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) - dst[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); + dst[x] = LUMA_FILTER(src, 1) >> shift; src += src_stride; dst += MAX_PB_SIZE; } } -static void FUNC(put_luma_v)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, +static void FUNC(put_luma_v)(int16_t *_dst, const uint8_t *_src, const ptrdiff_t _src_stride, const int height, const int8_t *hf, const int8_t *vf, const int width) { const pixel *src = (pixel*)_src; + tpixel *dst = (tpixel *)_dst; const ptrdiff_t src_stride = _src_stride / sizeof(pixel); const int8_t *filter = vf; + const int shift = FFMIN(4, BIT_DEPTH - 8); for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) - dst[x] = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); + dst[x] = LUMA_FILTER(src, src_stride) >> shift; src += src_stride; dst += MAX_PB_SIZE; } } -static void FUNC(put_luma_hv)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, +static void FUNC(put_luma_hv)(int16_t *_dst, const uint8_t *_src, const ptrdiff_t _src_stride, const int height, const int8_t *hf, const int8_t *vf, const int width) { - int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; - int16_t *tmp = tmp_array; + tpixel tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; + tpixel *tmp = tmp_array; const pixel *src = (const pixel*)_src; + tpixel *dst = (tpixel *)_dst; const ptrdiff_t src_stride = _src_stride / sizeof(pixel); const int8_t *filter = hf; + const int shift = FFMIN(4, BIT_DEPTH - 8); src -= LUMA_EXTRA_BEFORE * src_stride; for (int y = 0; y < height + LUMA_EXTRA; y++) { for (int x = 0; x < width; x++) - tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); + tmp[x] = LUMA_FILTER(src, 1) >> shift; src += src_stride; tmp += MAX_PB_SIZE; } From 06a55571aa3bc6fa79fd74887631d87c07c12e16 Mon Sep 17 00:00:00 2001 From: Wu Jianhua Date: Tue, 15 Apr 2025 04:45:46 +0800 Subject: [PATCH 04/31] avcodec/h26x/h2656_inter_template: fix put chroma functions for high bitdepth Signed-off-by: Wu Jianhua --- libavcodec/h26x/h2656_inter_template.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/libavcodec/h26x/h2656_inter_template.c b/libavcodec/h26x/h2656_inter_template.c index cece87f1ad..25f11fc828 100644 --- a/libavcodec/h26x/h2656_inter_template.c +++ b/libavcodec/h26x/h2656_inter_template.c @@ -342,50 +342,56 @@ static void FUNC(put_uni_luma_w_hv)(uint8_t *_dst, const ptrdiff_t _dst_stride, filter[2] * src[x + stride] + \ filter[3] * src[x + 2 * stride]) -static void FUNC(put_chroma_h)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, +static void FUNC(put_chroma_h)(int16_t *_dst, const uint8_t *_src, const ptrdiff_t _src_stride, const int height, const int8_t *hf, const int8_t *vf, const int width) { const pixel *src = (const pixel *)_src; + tpixel *dst = (tpixel *)_dst; const ptrdiff_t src_stride = _src_stride / sizeof(pixel); const int8_t *filter = hf; + const int shift = FFMIN(4, BIT_DEPTH - 8); for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) - dst[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); + dst[x] = CHROMA_FILTER(src, 1) >> shift; src += src_stride; dst += MAX_PB_SIZE; } } -static void FUNC(put_chroma_v)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, +static void FUNC(put_chroma_v)(int16_t *_dst, const uint8_t *_src, const ptrdiff_t _src_stride, const int height, const int8_t *hf, const int8_t *vf, const int width) { const pixel *src = (const pixel *)_src; + tpixel *dst = (tpixel *)_dst; const ptrdiff_t src_stride = _src_stride / sizeof(pixel); const int8_t *filter = vf; + const int shift = FFMIN(4, BIT_DEPTH - 8); for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) - dst[x] = CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); + dst[x] = CHROMA_FILTER(src, src_stride) >> shift; src += src_stride; dst += MAX_PB_SIZE; } } -static void FUNC(put_chroma_hv)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, +static void FUNC(put_chroma_hv)(int16_t *_dst, const uint8_t *_src, const ptrdiff_t _src_stride, const int height, const int8_t *hf, const int8_t *vf, const int width) { - int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; - int16_t *tmp = tmp_array; + tpixel tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; + tpixel *tmp = tmp_array; const pixel *src = (const pixel *)_src; + tpixel *dst = (tpixel *)_dst; const ptrdiff_t src_stride = _src_stride / sizeof(pixel); const int8_t *filter = hf; + const int shift = FFMIN(4, BIT_DEPTH - 8); src -= CHROMA_EXTRA_BEFORE * src_stride; for (int y = 0; y < height + CHROMA_EXTRA; y++) { for (int x = 0; x < width; x++) - tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); + tmp[x] = CHROMA_FILTER(src, 1) >> shift; src += src_stride; tmp += MAX_PB_SIZE; } From 1707be2a47a25479f8761a32cbe0e9399f45dc30 Mon Sep 17 00:00:00 2001 From: Wu Jianhua Date: Tue, 15 Apr 2025 04:45:54 +0800 Subject: [PATCH 05/31] avcodec/h26x/h2656_inter_template: fix put uni luma functions for high bitdepth Signed-off-by: Wu Jianhua --- libavcodec/h26x/h2656_inter_template.c | 28 ++++++++------------------ 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/libavcodec/h26x/h2656_inter_template.c b/libavcodec/h26x/h2656_inter_template.c index 25f11fc828..49f45b8d62 100644 --- a/libavcodec/h26x/h2656_inter_template.c +++ b/libavcodec/h26x/h2656_inter_template.c @@ -163,16 +163,12 @@ static void FUNC(put_uni_luma_h)(uint8_t *_dst, const ptrdiff_t _dst_stride, const ptrdiff_t src_stride = _src_stride / sizeof(pixel); const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); const int8_t *filter = hf; - const int shift = 14 - BIT_DEPTH; -#if BIT_DEPTH < 14 + const int shift = FFMAX(2, 14 - BIT_DEPTH); const int offset = 1 << (shift - 1); -#else - const int offset = 0; -#endif for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) { - const int val = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); + const int val = LUMA_FILTER(src, 1) >> FFMIN(4, BIT_DEPTH - 8); dst[x] = av_clip_pixel((val + offset) >> shift); } src += src_stride; @@ -190,16 +186,12 @@ static void FUNC(put_uni_luma_v)(uint8_t *_dst, const ptrdiff_t _dst_stride, const ptrdiff_t src_stride = _src_stride / sizeof(pixel); const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); const int8_t *filter = vf; - const int shift = 14 - BIT_DEPTH; -#if BIT_DEPTH < 14 + const int shift = FFMAX(2, 14 - BIT_DEPTH); const int offset = 1 << (shift - 1); -#else - const int offset = 0; -#endif for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) { - const int val = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); + const int val = LUMA_FILTER(src, src_stride) >> FFMIN(4, BIT_DEPTH - 8); dst[x] = av_clip_pixel((val + offset) >> shift); } src += src_stride; @@ -211,24 +203,20 @@ static void FUNC(put_uni_luma_hv)(uint8_t *_dst, const ptrdiff_t _dst_stride, const uint8_t *_src, const ptrdiff_t _src_stride, const int height, const int8_t *hf, const int8_t *vf, const int width) { - int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; - int16_t *tmp = tmp_array; + tpixel tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; + tpixel *tmp = tmp_array; const pixel *src = (const pixel*)_src; pixel *dst = (pixel *)_dst; const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); const ptrdiff_t src_stride = _src_stride / sizeof(pixel); const int8_t *filter = hf; - const int shift = 14 - BIT_DEPTH; -#if BIT_DEPTH < 14 + int shift = FFMAX(2, 14 - BIT_DEPTH); const int offset = 1 << (shift - 1); -#else - const int offset = 0; -#endif src -= LUMA_EXTRA_BEFORE * src_stride; for (int y = 0; y < height + LUMA_EXTRA; y++) { for (int x = 0; x < width; x++) - tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); + tmp[x] = LUMA_FILTER(src, 1) >> FFMIN(4, BIT_DEPTH - 8); src += src_stride; tmp += MAX_PB_SIZE; } From f159c0784fc809d72700297248cf6af7aba66e9c Mon Sep 17 00:00:00 2001 From: Wu Jianhua Date: Tue, 15 Apr 2025 04:46:05 +0800 Subject: [PATCH 06/31] avcodec/h26x/h2656_inter_template: fix put uni chroma functions for high bitdepth Signed-off-by: Wu Jianhua --- libavcodec/h26x/h2656_inter_template.c | 28 ++++++++------------------ 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/libavcodec/h26x/h2656_inter_template.c b/libavcodec/h26x/h2656_inter_template.c index 49f45b8d62..c5a92037a1 100644 --- a/libavcodec/h26x/h2656_inter_template.c +++ b/libavcodec/h26x/h2656_inter_template.c @@ -404,16 +404,12 @@ static void FUNC(put_uni_chroma_h)(uint8_t *_dst, const ptrdiff_t _dst_stride, const ptrdiff_t src_stride = _src_stride / sizeof(pixel); const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); const int8_t *filter = hf; - const int shift = 14 - BIT_DEPTH; -#if BIT_DEPTH < 14 + const int shift = FFMAX(2, 14 - BIT_DEPTH); const int offset = 1 << (shift - 1); -#else - const int offset = 0; -#endif for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) - dst[x] = av_clip_pixel(((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift); + dst[x] = av_clip_pixel(((CHROMA_FILTER(src, 1) >> FFMIN(4, BIT_DEPTH - 8)) + offset) >> shift); src += src_stride; dst += dst_stride; } @@ -428,16 +424,12 @@ static void FUNC(put_uni_chroma_v)(uint8_t *_dst, const ptrdiff_t _dst_stride, const ptrdiff_t src_stride = _src_stride / sizeof(pixel); const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); const int8_t *filter = vf; - const int shift = 14 - BIT_DEPTH; -#if BIT_DEPTH < 14 + const int shift = FFMAX(2, 14 - BIT_DEPTH); const int offset = 1 << (shift - 1); -#else - const int offset = 0; -#endif for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) - dst[x] = av_clip_pixel(((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) + offset) >> shift); + dst[x] = av_clip_pixel(((CHROMA_FILTER(src, src_stride) >> FFMIN(4, BIT_DEPTH - 8)) + offset) >> shift); src += src_stride; dst += dst_stride; } @@ -447,25 +439,21 @@ static void FUNC(put_uni_chroma_hv)(uint8_t *_dst, const ptrdiff_t _dst_stride, const uint8_t *_src, const ptrdiff_t _src_stride, const int height, const int8_t *hf, const int8_t *vf, const int width) { - int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; - int16_t *tmp = tmp_array; + tpixel tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; + tpixel *tmp = tmp_array; const pixel *src = (const pixel *)_src; pixel *dst = (pixel *)_dst; const ptrdiff_t src_stride = _src_stride / sizeof(pixel); const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); const int8_t *filter = hf; - const int shift = 14 - BIT_DEPTH; -#if BIT_DEPTH < 14 + const int shift = FFMAX(2, 14 - BIT_DEPTH); const int offset = 1 << (shift - 1); -#else - const int offset = 0; -#endif src -= CHROMA_EXTRA_BEFORE * src_stride; for (int y = 0; y < height + CHROMA_EXTRA; y++) { for (int x = 0; x < width; x++) - tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); + tmp[x] = CHROMA_FILTER(src, 1) >> FFMIN(4, BIT_DEPTH - 8); src += src_stride; tmp += MAX_PB_SIZE; } From 78b47cced3f504c4cf012f2a11a5882f515b2237 Mon Sep 17 00:00:00 2001 From: Wu Jianhua Date: Tue, 15 Apr 2025 04:46:12 +0800 Subject: [PATCH 07/31] avcodec/h26x/h2656_inter_template: fix put uni luma w functions for high bitdepth Signed-off-by: Wu Jianhua --- libavcodec/h26x/h2656_inter_template.c | 30 ++++++++------------------ 1 file changed, 9 insertions(+), 21 deletions(-) diff --git a/libavcodec/h26x/h2656_inter_template.c b/libavcodec/h26x/h2656_inter_template.c index c5a92037a1..fee561b8e2 100644 --- a/libavcodec/h26x/h2656_inter_template.c +++ b/libavcodec/h26x/h2656_inter_template.c @@ -245,17 +245,13 @@ static void FUNC(put_uni_luma_w_h)(uint8_t *_dst, const ptrdiff_t _dst_stride, const ptrdiff_t src_stride = _src_stride / sizeof(pixel); const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); const int8_t *filter = hf; - const int ox = _ox * (1 << (BIT_DEPTH - 8)); - const int shift = denom + 14 - BIT_DEPTH; -#if BIT_DEPTH < 14 + const int ox = _ox * (1 << FFMIN(4, BIT_DEPTH - 8)); + const int shift = denom + FFMAX(2, 14 - BIT_DEPTH); const int offset = 1 << (shift - 1); -#else - const int offset = 0; -#endif for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) - dst[x] = av_clip_pixel((((LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); + dst[x] = av_clip_pixel((((LUMA_FILTER(src, 1) >> FFMIN(4, BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); src += src_stride; dst += dst_stride; } @@ -271,17 +267,13 @@ static void FUNC(put_uni_luma_w_v)(uint8_t *_dst, const ptrdiff_t _dst_stride, const ptrdiff_t src_stride = _src_stride / sizeof(pixel); const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); const int8_t *filter = vf; - const int ox = _ox * (1 << (BIT_DEPTH - 8)); - const int shift = denom + 14 - BIT_DEPTH; -#if BIT_DEPTH < 14 + const int ox = _ox * (1 << FFMIN(4, BIT_DEPTH - 8)); + const int shift = denom + FFMAX(2, 14 - BIT_DEPTH); const int offset = 1 << (shift - 1); -#else - const int offset = 0; -#endif for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) - dst[x] = av_clip_pixel((((LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); + dst[x] = av_clip_pixel((((LUMA_FILTER(src, src_stride) >> FFMIN(4, BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); src += src_stride; dst += dst_stride; } @@ -298,18 +290,14 @@ static void FUNC(put_uni_luma_w_hv)(uint8_t *_dst, const ptrdiff_t _dst_stride, const ptrdiff_t src_stride = _src_stride / sizeof(pixel); const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); const int8_t *filter = hf; - const int ox = _ox * (1 << (BIT_DEPTH - 8)); - const int shift = denom + 14 - BIT_DEPTH; -#if BIT_DEPTH < 14 + const int ox = _ox * (1 << FFMIN(4, BIT_DEPTH - 8)); + const int shift = denom + FFMAX(2, 14 - BIT_DEPTH); const int offset = 1 << (shift - 1); -#else - const int offset = 0; -#endif src -= LUMA_EXTRA_BEFORE * src_stride; for (int y = 0; y < height + LUMA_EXTRA; y++) { for (int x = 0; x < width; x++) - tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); + tmp[x] = LUMA_FILTER(src, 1) >> FFMIN(4, BIT_DEPTH - 8); src += src_stride; tmp += MAX_PB_SIZE; } From 0bca8ca611ee3c16d463f919ee80daf0882196e6 Mon Sep 17 00:00:00 2001 From: Wu Jianhua Date: Tue, 15 Apr 2025 04:46:20 +0800 Subject: [PATCH 08/31] avcodec/h26x/h2656_inter_template: fix put uni chroma w functions for high bitdepth Signed-off-by: Wu Jianhua --- libavcodec/h26x/h2656_inter_template.c | 30 ++++++++------------------ 1 file changed, 9 insertions(+), 21 deletions(-) diff --git a/libavcodec/h26x/h2656_inter_template.c b/libavcodec/h26x/h2656_inter_template.c index fee561b8e2..506f9a9704 100644 --- a/libavcodec/h26x/h2656_inter_template.c +++ b/libavcodec/h26x/h2656_inter_template.c @@ -466,17 +466,13 @@ static void FUNC(put_uni_chroma_w_h)(uint8_t *_dst, ptrdiff_t _dst_stride, const ptrdiff_t src_stride = _src_stride / sizeof(pixel); const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); const int8_t *filter = hf; - const int shift = denom + 14 - BIT_DEPTH; -#if BIT_DEPTH < 14 + const int shift = denom + FFMAX(2, 14 - BIT_DEPTH); const int offset = 1 << (shift - 1); -#else - const int offset = 0; -#endif - ox = ox * (1 << (BIT_DEPTH - 8)); + ox = ox * (1 << FFMIN(4, BIT_DEPTH - 8)); for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) { - dst[x] = av_clip_pixel((((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); + dst[x] = av_clip_pixel((((CHROMA_FILTER(src, 1) >> FFMIN(4, BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); } dst += dst_stride; src += src_stride; @@ -493,17 +489,13 @@ static void FUNC(put_uni_chroma_w_v)(uint8_t *_dst, const ptrdiff_t _dst_stride, const ptrdiff_t src_stride = _src_stride / sizeof(pixel); const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); const int8_t *filter = vf; - const int shift = denom + 14 - BIT_DEPTH; - const int ox = _ox * (1 << (BIT_DEPTH - 8)); -#if BIT_DEPTH < 14 + const int shift = denom + FFMAX(2, 14 - BIT_DEPTH); + const int ox = _ox * (1 << FFMIN(4, BIT_DEPTH - 8)); int offset = 1 << (shift - 1); -#else - int offset = 0; -#endif for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) { - dst[x] = av_clip_pixel((((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); + dst[x] = av_clip_pixel((((CHROMA_FILTER(src, src_stride) >> FFMIN(4, BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); } dst += dst_stride; src += src_stride; @@ -521,18 +513,14 @@ static void FUNC(put_uni_chroma_w_hv)(uint8_t *_dst, ptrdiff_t _dst_stride, const ptrdiff_t src_stride = _src_stride / sizeof(pixel); const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); const int8_t *filter = hf; - const int shift = denom + 14 - BIT_DEPTH; -#if BIT_DEPTH < 14 + const int shift = denom + FFMAX(2, 14 - BIT_DEPTH); const int offset = 1 << (shift - 1); -#else - const int offset = 0; -#endif src -= CHROMA_EXTRA_BEFORE * src_stride; for (int y = 0; y < height + CHROMA_EXTRA; y++) { for (int x = 0; x < width; x++) - tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); + tmp[x] = CHROMA_FILTER(src, 1) >> FFMIN(4, BIT_DEPTH - 8); src += src_stride; tmp += MAX_PB_SIZE; } @@ -540,7 +528,7 @@ static void FUNC(put_uni_chroma_w_hv)(uint8_t *_dst, ptrdiff_t _dst_stride, tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; filter = vf; - ox = ox * (1 << (BIT_DEPTH - 8)); + ox = ox * (1 << FFMIN(4, BIT_DEPTH - 8)); for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) dst[x] = av_clip_pixel((((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox); From a99d07dd0c2a18bbddfcb5d675a27ab75645202c Mon Sep 17 00:00:00 2001 From: Wu Jianhua Date: Tue, 15 Apr 2025 04:46:50 +0800 Subject: [PATCH 09/31] avcodec/vvc/inter_template: fix avg function for high bitdepth Signed-off-by: Wu Jianhua --- libavcodec/vvc/inter_template.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libavcodec/vvc/inter_template.c b/libavcodec/vvc/inter_template.c index aee4994c17..535f5e1e75 100644 --- a/libavcodec/vvc/inter_template.c +++ b/libavcodec/vvc/inter_template.c @@ -183,8 +183,10 @@ static void FUNC(put_uni_chroma_w_scaled)(uint8_t *_dst, const ptrdiff_t _dst_st #undef TMP_STRIDE static void FUNC(avg)(uint8_t *_dst, const ptrdiff_t _dst_stride, - const int16_t *src0, const int16_t *src1, const int width, const int height) + const int16_t *_src0, const int16_t *_src1, const int width, const int height) { + const tpixel *src0 = (const tpixel *)_src0; + const tpixel *src1 = (const tpixel *)_src1; pixel *dst = (pixel*)_dst; const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); const int shift = FFMAX(3, 15 - BIT_DEPTH); From 848b4602ff948c9fe6ce2d427e8d45a54e381e2c Mon Sep 17 00:00:00 2001 From: Wu Jianhua Date: Tue, 15 Apr 2025 04:46:58 +0800 Subject: [PATCH 10/31] avcodec/vvc/inter_template: fix w_avg function for high bitdepth Signed-off-by: Wu Jianhua --- libavcodec/vvc/inter_template.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libavcodec/vvc/inter_template.c b/libavcodec/vvc/inter_template.c index 535f5e1e75..dbb11a0fba 100644 --- a/libavcodec/vvc/inter_template.c +++ b/libavcodec/vvc/inter_template.c @@ -202,9 +202,11 @@ static void FUNC(avg)(uint8_t *_dst, const ptrdiff_t _dst_stride, } static void FUNC(w_avg)(uint8_t *_dst, const ptrdiff_t _dst_stride, - const int16_t *src0, const int16_t *src1, const int width, const int height, + const int16_t *_src0, const int16_t *_src1, const int width, const int height, const int denom, const int w0, const int w1, const int o0, const int o1) { + const tpixel *src0 = (const tpixel *)_src0; + const tpixel *src1 = (const tpixel *)_src1; pixel *dst = (pixel*)_dst; const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); const int shift = denom + FFMAX(3, 15 - BIT_DEPTH); From 7b0bb24b877e92922a01990d1ae0e6ea26424d51 Mon Sep 17 00:00:00 2001 From: Wu Jianhua Date: Fri, 30 May 2025 02:55:58 +0800 Subject: [PATCH 11/31] avcodec/vvc/inter_template: fix put_scaled function for high bitdepth Signed-off-by: Wu Jianhua --- libavcodec/vvc/inter_template.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/libavcodec/vvc/inter_template.c b/libavcodec/vvc/inter_template.c index dbb11a0fba..89b09797fa 100644 --- a/libavcodec/vvc/inter_template.c +++ b/libavcodec/vvc/inter_template.c @@ -29,10 +29,10 @@ static void av_always_inline FUNC(put_scaled)(uint8_t *_dst, const ptrdiff_t _ds const int _x, const int _y, const int dx, const int dy, const int height, const int8_t *hf, const int8_t *vf, const int width, const int is_uni, const int is_chroma) { - int16_t tmp_array[TMP_STRIDE * MAX_PB_SIZE]; - int16_t *tmp = tmp_array; - pixel *dst = (pixel*)_dst; - int16_t *dst16 = (int16_t*)_dst; + tpixel tmp_array[TMP_STRIDE * MAX_PB_SIZE]; + tpixel *tmp = tmp_array; + pixel *dst = (pixel *)_dst; + tpixel *dst16 = (tpixel *)_dst; const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); const ptrdiff_t src_stride = _src_stride / sizeof(pixel); const int shift = FFMAX(2, 14 - BIT_DEPTH); From 80370c793e9579632001dc05617ba36a43a26512 Mon Sep 17 00:00:00 2001 From: Wu Jianhua Date: Tue, 15 Apr 2025 04:47:05 +0800 Subject: [PATCH 12/31] avcodec/vvc/inter_template: fix put_gpm function for high bitdepth This commit fixed the decoding process for geometric partitioning mode inter blocks Signed-off-by: Wu Jianhua --- libavcodec/vvc/inter_template.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/libavcodec/vvc/inter_template.c b/libavcodec/vvc/inter_template.c index 89b09797fa..501b49b91f 100644 --- a/libavcodec/vvc/inter_template.c +++ b/libavcodec/vvc/inter_template.c @@ -241,12 +241,14 @@ static void FUNC(put_ciip)(uint8_t *_dst, const ptrdiff_t _dst_stride, static void FUNC(put_gpm)(uint8_t *_dst, ptrdiff_t dst_stride, const int width, const int height, - const int16_t *src0, const int16_t *src1, + const int16_t *_src0, const int16_t *_src1, const uint8_t *weights, const int step_x, const int step_y) { - const int shift = FFMAX(5, 17 - BIT_DEPTH); - const int offset = 1 << (shift - 1); - pixel *dst = (pixel *)_dst; + const tpixel *src0 = (const tpixel *)_src0; + const tpixel *src1 = (const tpixel *)_src1; + pixel *dst = (pixel *)_dst; + const int shift = FFMAX(5, 17 - BIT_DEPTH); + const int offset = 1 << (shift - 1); dst_stride /= sizeof(pixel); for (int y = 0; y < height; y++) { From 84bb2f6df6f256c2cc58debab10bfc42150fa02b Mon Sep 17 00:00:00 2001 From: Wu Jianhua Date: Tue, 15 Apr 2025 04:47:12 +0800 Subject: [PATCH 13/31] avcodec/vvc/inter_template: fix bdof_fetch_samples function for high bitdepth This commit fixed the fetching samples process for Bi-directional optical flow prediction process Signed-off-by: Wu Jianhua --- libavcodec/vvc/inter_template.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/libavcodec/vvc/inter_template.c b/libavcodec/vvc/inter_template.c index 501b49b91f..cf6e287c1c 100644 --- a/libavcodec/vvc/inter_template.c +++ b/libavcodec/vvc/inter_template.c @@ -271,25 +271,25 @@ static void FUNC(bdof_fetch_samples)(int16_t *_dst, const uint8_t *_src, const p const int y_off = (y_frac >> 3) - 1; const ptrdiff_t src_stride = _src_stride / sizeof(pixel); const pixel *src = (pixel*)_src + (x_off) + y_off * src_stride; - int16_t *dst = _dst - 1 - MAX_PB_SIZE; - const int shift = 14 - BIT_DEPTH; + tpixel *dst = (tpixel *)_dst - 1 - MAX_PB_SIZE; + const int shift = FFMAX(2, 14 - BIT_DEPTH); const int bdof_width = width + 2 * BDOF_BORDER_EXT; // top for (int i = 0; i < bdof_width; i++) - dst[i] = src[i] << shift; + dst[i] = (src[i] << shift); dst += MAX_PB_SIZE; src += src_stride; for (int i = 0; i < height; i++) { - dst[0] = src[0] << shift; - dst[1 + width] = src[1 + width] << shift; + dst[0] = (src[0] << shift); + dst[1 + width] = (src[1 + width] << shift); dst += MAX_PB_SIZE; src += src_stride; } for (int i = 0; i < bdof_width; i++) - dst[i] = src[i] << shift; + dst[i] = (src[i] << shift); } //8.5.6.3.3 Luma integer sample fetching process From 0ab7d5e33dad52743bc88af362f6d2ba98f5d539 Mon Sep 17 00:00:00 2001 From: Wu Jianhua Date: Tue, 15 Apr 2025 04:47:19 +0800 Subject: [PATCH 14/31] avcodec/vvc/inter_template: fix prof_grad_filter function for high bitdepth This commit fixed the gradient filter for prediction refinement with optical flow process Signed-off-by: Wu Jianhua --- libavcodec/vvc/inter_template.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/libavcodec/vvc/inter_template.c b/libavcodec/vvc/inter_template.c index cf6e287c1c..7c264142bf 100644 --- a/libavcodec/vvc/inter_template.c +++ b/libavcodec/vvc/inter_template.c @@ -298,14 +298,13 @@ static void FUNC(fetch_samples)(int16_t *_dst, const uint8_t *_src, const ptrdif FUNC(bdof_fetch_samples)(_dst, _src, _src_stride, x_frac, y_frac, AFFINE_MIN_BLOCK_SIZE, AFFINE_MIN_BLOCK_SIZE); } -static void FUNC(prof_grad_filter)(int16_t *gradient_h, int16_t *gradient_v, const ptrdiff_t gradient_stride, - const int16_t *_src, const ptrdiff_t src_stride, const int width, const int height) +static void FUNC(prof_grad_filter)(tpixel *gradient_h, tpixel *gradient_v, const ptrdiff_t gradient_stride, + const tpixel *src, const ptrdiff_t src_stride, const int width, const int height) { - const int shift = 6; - const int16_t *src = _src; + const int shift = 6; for (int y = 0; y < height; y++) { - const int16_t *p = src; + const tpixel *p = src; for (int x = 0; x < width; x++) { gradient_h[x] = (p[1] >> shift) - (p[-1] >> shift); gradient_v[x] = (p[src_stride] >> shift) - (p[-src_stride] >> shift); From ddf02de339077f8636f787d14839dfc2d13960df Mon Sep 17 00:00:00 2001 From: Wu Jianhua Date: Tue, 15 Apr 2025 04:47:28 +0800 Subject: [PATCH 15/31] avcodec/vvc/inter_template: fix apply_prof function for high bitdepth This commit fixed the prediction refinement with optical flow process Signed-off-by: Wu Jianhua --- libavcodec/vvc/inter_template.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/libavcodec/vvc/inter_template.c b/libavcodec/vvc/inter_template.c index 7c264142bf..694e4c45d8 100644 --- a/libavcodec/vvc/inter_template.c +++ b/libavcodec/vvc/inter_template.c @@ -316,12 +316,15 @@ static void FUNC(prof_grad_filter)(tpixel *gradient_h, tpixel *gradient_v, const } } -static void FUNC(apply_prof)(int16_t *dst, const int16_t *src, const int16_t *diff_mv_x, const int16_t *diff_mv_y) +static void FUNC(apply_prof)(int16_t *_dst, const int16_t *_src, const int16_t *diff_mv_x, const int16_t *diff_mv_y) { - const int limit = (1 << FFMAX(13, BIT_DEPTH + 1)); ///< dILimit + const tpixel *src = (const tpixel *)_src; + tpixel *dst = (tpixel *)_dst; + const int limit = (1 << FFMAX(13, BIT_DEPTH + 1)); ///< dILimit + + tpixel gradient_h[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE]; + tpixel gradient_v[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE]; - int16_t gradient_h[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE]; - int16_t gradient_v[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE]; FUNC(prof_grad_filter)(gradient_h, gradient_v, AFFINE_MIN_BLOCK_SIZE, src, MAX_PB_SIZE, AFFINE_MIN_BLOCK_SIZE, AFFINE_MIN_BLOCK_SIZE); for (int y = 0; y < AFFINE_MIN_BLOCK_SIZE; y++) { From ce3376d3c1a26b14a6361b735cd3818de5c4b6a9 Mon Sep 17 00:00:00 2001 From: Wu Jianhua Date: Tue, 15 Apr 2025 04:47:35 +0800 Subject: [PATCH 16/31] avcodec/vvc/inter_template: fix apply_prof_uni function for high bitdepth This commit fixed the prediction refinement with optical flow process Signed-off-by: Wu Jianhua --- libavcodec/vvc/inter_template.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/libavcodec/vvc/inter_template.c b/libavcodec/vvc/inter_template.c index 694e4c45d8..0bddc8f471 100644 --- a/libavcodec/vvc/inter_template.c +++ b/libavcodec/vvc/inter_template.c @@ -333,26 +333,23 @@ static void FUNC(apply_prof)(int16_t *_dst, const int16_t *_src, const int16_t * const int di = gradient_h[o] * diff_mv_x[o] + gradient_v[o] * diff_mv_y[o]; const int val = src[x] + av_clip(di, -limit, limit - 1); dst[x] = val; - } src += MAX_PB_SIZE; dst += MAX_PB_SIZE; } } -static void FUNC(apply_prof_uni)(uint8_t *_dst, const ptrdiff_t _dst_stride, const int16_t *src, const int16_t *diff_mv_x, const int16_t *diff_mv_y) +static void FUNC(apply_prof_uni)(uint8_t *_dst, const ptrdiff_t _dst_stride, const int16_t *_src, const int16_t *diff_mv_x, const int16_t *diff_mv_y) { const int limit = (1 << FFMAX(13, BIT_DEPTH + 1)); ///< dILimit + const tpixel *src = (const tpixel *)_src; pixel *dst = (pixel*)_dst; const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); - const int shift = 14 - BIT_DEPTH; -#if BIT_DEPTH < 14 + const int shift = FFMAX(2, 14 - BIT_DEPTH); const int offset = 1 << (shift - 1); -#else - const int offset = 0; -#endif - int16_t gradient_h[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE]; - int16_t gradient_v[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE]; + + tpixel gradient_h[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE]; + tpixel gradient_v[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE]; FUNC(prof_grad_filter)(gradient_h, gradient_v, AFFINE_MIN_BLOCK_SIZE, src, MAX_PB_SIZE, AFFINE_MIN_BLOCK_SIZE, AFFINE_MIN_BLOCK_SIZE); @@ -362,7 +359,6 @@ static void FUNC(apply_prof_uni)(uint8_t *_dst, const ptrdiff_t _dst_stride, con const int di = gradient_h[o] * diff_mv_x[o] + gradient_v[o] * diff_mv_y[o]; const int val = src[x] + av_clip(di, -limit, limit - 1); dst[x] = av_clip_pixel((val + offset) >> shift); - } src += MAX_PB_SIZE; dst += dst_stride; From 20f48b02936a84890cd6e9f615196907821338a2 Mon Sep 17 00:00:00 2001 From: Wu Jianhua Date: Fri, 30 May 2025 02:20:36 +0800 Subject: [PATCH 17/31] avcodec/vvc/inter_template: fix apply_prof_uni_w function for high bitdepth Signed-off-by: Wu Jianhua --- libavcodec/vvc/inter_template.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/libavcodec/vvc/inter_template.c b/libavcodec/vvc/inter_template.c index 0bddc8f471..64e5fab7f5 100644 --- a/libavcodec/vvc/inter_template.c +++ b/libavcodec/vvc/inter_template.c @@ -366,17 +366,18 @@ static void FUNC(apply_prof_uni)(uint8_t *_dst, const ptrdiff_t _dst_stride, con } static void FUNC(apply_prof_uni_w)(uint8_t *_dst, const ptrdiff_t _dst_stride, - const int16_t *src, const int16_t *diff_mv_x, const int16_t *diff_mv_y, + const int16_t *_src, const int16_t *diff_mv_x, const int16_t *diff_mv_y, const int denom, const int wx, const int _ox) { const int limit = (1 << FFMAX(13, BIT_DEPTH + 1)); ///< dILimit + const tpixel *src = (const tpixel *)_src; pixel *dst = (pixel*)_dst; const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); const int shift = denom + FFMAX(2, 14 - BIT_DEPTH); const int offset = 1 << (shift - 1); const int ox = _ox * (1 << (BIT_DEPTH - 8)); - int16_t gradient_h[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE]; - int16_t gradient_v[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE]; + tpixel gradient_h[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE]; + tpixel gradient_v[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE]; FUNC(prof_grad_filter)(gradient_h, gradient_v, AFFINE_MIN_BLOCK_SIZE, src, MAX_PB_SIZE, AFFINE_MIN_BLOCK_SIZE, AFFINE_MIN_BLOCK_SIZE); From 71a4db03990f53a00a54b4776433c8988688a169 Mon Sep 17 00:00:00 2001 From: Wu Jianhua Date: Tue, 15 Apr 2025 04:47:43 +0800 Subject: [PATCH 18/31] avcodec/vvc/inter_template: fix apply_bdof function for high bitdepth This commit fixed the Bi-directional optical flow prediction process Signed-off-by: Wu Jianhua --- libavcodec/vvc/inter_template.c | 57 +++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 24 deletions(-) diff --git a/libavcodec/vvc/inter_template.c b/libavcodec/vvc/inter_template.c index 64e5fab7f5..45f3640d95 100644 --- a/libavcodec/vvc/inter_template.c +++ b/libavcodec/vvc/inter_template.c @@ -393,9 +393,9 @@ static void FUNC(apply_prof_uni_w)(uint8_t *_dst, const ptrdiff_t _dst_stride, } } -static void FUNC(derive_bdof_vx_vy)(const int16_t *_src0, const int16_t *_src1, +static void FUNC(derive_bdof_vx_vy)(const tpixel *_src0, const tpixel *_src1, const int pad_left, const int pad_top, const int pad_right, const int pad_bottom, - const int16_t **gradient_h, const int16_t **gradient_v, + const tpixel **gradient_h, const tpixel **gradient_v, int* vx, int* vy) { const int shift2 = 4; @@ -403,17 +403,22 @@ static void FUNC(derive_bdof_vx_vy)(const int16_t *_src0, const int16_t *_src1, const int thres = 1 << 4; int sgx2 = 0, sgy2 = 0, sgxgy = 0, sgxdi = 0, sgydi = 0; + const tpixel *gh0 = gradient_h[0]; + const tpixel *gv0 = gradient_v[0]; + const tpixel *gh1 = gradient_h[1]; + const tpixel *gv1 = gradient_v[1]; + for (int y = -1; y < BDOF_MIN_BLOCK_SIZE + 1; y++) { - const int dy = y + (pad_top && y < 0) - (pad_bottom && y == BDOF_MIN_BLOCK_SIZE); // we pad for the first and last row - const int16_t *src0 = _src0 + dy * MAX_PB_SIZE; - const int16_t *src1 = _src1 + dy * MAX_PB_SIZE; + const int dy = y + (pad_top && y < 0) - (pad_bottom && y == BDOF_MIN_BLOCK_SIZE); // we pad for the first and last row + const tpixel *src0 = _src0 + dy * MAX_PB_SIZE; + const tpixel *src1 = _src1 + dy * MAX_PB_SIZE; for (int x = -1; x < BDOF_MIN_BLOCK_SIZE + 1; x++) { const int dx = x + (pad_left && x < 0) - (pad_right && x == BDOF_MIN_BLOCK_SIZE); // we pad for the first and last col const int diff = (src0[dx] >> shift2) - (src1[dx] >> shift2); const int idx = BDOF_BLOCK_SIZE * dy + dx; - const int temph = (gradient_h[0][idx] + gradient_h[1][idx]) >> shift3; - const int tempv = (gradient_v[0][idx] + gradient_v[1][idx]) >> shift3; + const int temph = (gh0[idx] + gh1[idx]) >> shift3; + const int tempv = (gv0[idx] + gv1[idx]) >> shift3; sgx2 += FFABS(temph); sgy2 += FFABS(tempv); @@ -426,11 +431,11 @@ static void FUNC(derive_bdof_vx_vy)(const int16_t *_src0, const int16_t *_src1, *vy = sgy2 > 0 ? av_clip(((sgydi * (1 << 2)) - ((*vx * sgxgy) >> 1)) >> av_log2(sgy2), -thres + 1, thres - 1) : 0; } -static void FUNC(apply_bdof_min_block)(pixel* dst, const ptrdiff_t dst_stride, const int16_t *src0, const int16_t *src1, - const int16_t **gh, const int16_t **gv, const int vx, const int vy) +static void FUNC(apply_bdof_min_block)(pixel* dst, const ptrdiff_t dst_stride, const tpixel *src0, const tpixel *src1, + const tpixel **gh, const tpixel **gv, const int vx, const int vy) { - const int shift4 = 15 - BIT_DEPTH; - const int offset4 = 1 << (shift4 - 1); + const int shift4 = FFMAX(3, 15 - BIT_DEPTH); + const int offset4 = 1 << (shift4 - 1); for (int y = 0; y < BDOF_MIN_BLOCK_SIZE; y++) { for (int x = 0; x < BDOF_MIN_BLOCK_SIZE; x++) { @@ -447,27 +452,31 @@ static void FUNC(apply_bdof_min_block)(pixel* dst, const ptrdiff_t dst_stride, c static void FUNC(apply_bdof)(uint8_t *_dst, const ptrdiff_t _dst_stride, const int16_t *_src0, const int16_t *_src1, const int block_w, const int block_h) { - int16_t gradient_h[2][BDOF_BLOCK_SIZE * BDOF_BLOCK_SIZE]; - int16_t gradient_v[2][BDOF_BLOCK_SIZE * BDOF_BLOCK_SIZE]; + tpixel gradient_h[2][BDOF_BLOCK_SIZE * BDOF_BLOCK_SIZE]; + tpixel gradient_v[2][BDOF_BLOCK_SIZE * BDOF_BLOCK_SIZE]; int vx, vy; const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); - pixel* dst = (pixel*)_dst; + const tpixel *src0 = (const tpixel *)_src0; + const tpixel *src1 = (const tpixel *)_src1; + pixel *dst = (pixel *)_dst; FUNC(prof_grad_filter)(gradient_h[0], gradient_v[0], BDOF_BLOCK_SIZE, - _src0, MAX_PB_SIZE, block_w, block_h); + src0, MAX_PB_SIZE, block_w, block_h); FUNC(prof_grad_filter)(gradient_h[1], gradient_v[1], BDOF_BLOCK_SIZE, - _src1, MAX_PB_SIZE, block_w, block_h); + src1, MAX_PB_SIZE, block_w, block_h); for (int y = 0; y < block_h; y += BDOF_MIN_BLOCK_SIZE) { for (int x = 0; x < block_w; x += BDOF_MIN_BLOCK_SIZE) { - const int16_t* src0 = _src0 + y * MAX_PB_SIZE + x; - const int16_t* src1 = _src1 + y * MAX_PB_SIZE + x; - pixel *d = dst + x; - const int idx = BDOF_BLOCK_SIZE * y + x; - const int16_t* gh[] = { gradient_h[0] + idx, gradient_h[1] + idx }; - const int16_t* gv[] = { gradient_v[0] + idx, gradient_v[1] + idx }; - FUNC(derive_bdof_vx_vy)(src0, src1, !x, !y, x + BDOF_MIN_BLOCK_SIZE == block_w, y + BDOF_MIN_BLOCK_SIZE == block_h, gh, gv, &vx, &vy); - FUNC(apply_bdof_min_block)(d, dst_stride, src0, src1, gh, gv, vx, vy); + const tpixel *s0 = src0 + y * MAX_PB_SIZE + x; + const tpixel *s1 = src1 + y * MAX_PB_SIZE + x; + + pixel *d = dst + x; + const int idx = BDOF_BLOCK_SIZE * y + x; + const tpixel *gh[] = { gradient_h[0] + idx, gradient_h[1] + idx }; + const tpixel *gv[] = { gradient_v[0] + idx, gradient_v[1] + idx }; + + FUNC(derive_bdof_vx_vy)(s0, s1, !x, !y, x + BDOF_MIN_BLOCK_SIZE == block_w, y + BDOF_MIN_BLOCK_SIZE == block_h, gh, gv, &vx, &vy); + FUNC(apply_bdof_min_block)(d, dst_stride, s0, s1, gh, gv, vx, vy); } dst += BDOF_MIN_BLOCK_SIZE * dst_stride; } From c6c326eaec6c9c6c898ba16f2dfde8f9b1092877 Mon Sep 17 00:00:00 2001 From: Wu Jianhua Date: Tue, 15 Apr 2025 04:46:28 +0800 Subject: [PATCH 19/31] avcodec/vvc/ctu: *2 for high bitdepth Signed-off-by: Wu Jianhua --- libavcodec/vvc/ctu.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libavcodec/vvc/ctu.h b/libavcodec/vvc/ctu.h index e37bacf9dd..b63fd3e65b 100644 --- a/libavcodec/vvc/ctu.h +++ b/libavcodec/vvc/ctu.h @@ -391,9 +391,9 @@ typedef struct VVCLocalContext { /* *2 for high bit depths */ DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[EDGE_EMU_BUFFER_STRIDE * EDGE_EMU_BUFFER_STRIDE * 2]; - DECLARE_ALIGNED(32, int16_t, tmp)[MAX_PB_SIZE * MAX_PB_SIZE]; - DECLARE_ALIGNED(32, int16_t, tmp1)[MAX_PB_SIZE * MAX_PB_SIZE]; - DECLARE_ALIGNED(32, int16_t, tmp2)[MAX_PB_SIZE * MAX_PB_SIZE]; + DECLARE_ALIGNED(32, int16_t, tmp)[MAX_PB_SIZE * MAX_PB_SIZE * 2]; + DECLARE_ALIGNED(32, int16_t, tmp1)[MAX_PB_SIZE * MAX_PB_SIZE * 2]; + DECLARE_ALIGNED(32, int16_t, tmp2)[MAX_PB_SIZE * MAX_PB_SIZE * 2]; DECLARE_ALIGNED(32, uint8_t, ciip_tmp)[MAX_PB_SIZE * MAX_PB_SIZE * 2]; DECLARE_ALIGNED(32, uint8_t, sao_buffer)[(MAX_CTU_SIZE + 2 * SAO_PADDING_SIZE) * EDGE_EMU_BUFFER_STRIDE * 2]; DECLARE_ALIGNED(32, uint8_t, alf_buffer_luma)[(MAX_CTU_SIZE + 2 * ALF_PADDING_SIZE) * EDGE_EMU_BUFFER_STRIDE * 2]; From 6e1863124ea8c2843f52dd96e2d882fbe03af80b Mon Sep 17 00:00:00 2001 From: Wu Jianhua Date: Tue, 15 Apr 2025 04:47:49 +0800 Subject: [PATCH 20/31] avcodec/vvc/inter: multiply the memory size of temporary buffer by 2 for high bitdepth Signed-off-by: Wu Jianhua --- libavcodec/vvc/inter.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/libavcodec/vvc/inter.c b/libavcodec/vvc/inter.c index 64a9dd1e46..be8b9c8fe6 100644 --- a/libavcodec/vvc/inter.c +++ b/libavcodec/vvc/inter.c @@ -30,6 +30,8 @@ #define PROF_TEMP_OFFSET (MAX_PB_SIZE + 32) static const int bcw_w_lut[] = {4, 5, 3, 10, -2}; +#define HIGHBD_MULTIPLIER(bd) (((bd) + 2) >> 3) + static void subpic_get_rect(VVCRect *r, const VVCFrame *src_frame, const int subpic_idx, const int is_chroma) { const VVCSPS *sps = src_frame->sps; @@ -319,8 +321,9 @@ static void mc_bi(VVCLocalContext *lc, uint8_t *dst, const ptrdiff_t dst_stride, const int hs = fc->ps.sps->hshift[c_idx]; const int vs = fc->ps.sps->vshift[c_idx]; const int idx = av_log2(block_w) - 1; + const int mult = HIGHBD_MULTIPLIER(fc->ps.sps->bit_depth); const VVCFrame *refs[] = { ref0, ref1 }; - int16_t *tmp[] = { lc->tmp + sb_bdof_flag * PROF_TEMP_OFFSET, lc->tmp1 + sb_bdof_flag * PROF_TEMP_OFFSET }; + int16_t *tmp[] = { lc->tmp + sb_bdof_flag * PROF_TEMP_OFFSET * mult, lc->tmp1 + sb_bdof_flag * PROF_TEMP_OFFSET * mult }; int denom, w0, w1, o0, o1; const int weight_flag = derive_weight(&denom, &w0, &w1, &o0, &o1, lc, mvf, c_idx, pu->dmvr_flag); const int is_chroma = !!c_idx; @@ -497,7 +500,7 @@ static void luma_prof_uni(VVCLocalContext *lc, uint8_t *dst, const ptrdiff_t dst const VVCFrameContext *fc = lc->fc; const uint8_t *src = ref->frame->data[LUMA]; ptrdiff_t src_stride = ref->frame->linesize[LUMA]; - uint16_t *prof_tmp = lc->tmp + PROF_TEMP_OFFSET; + int16_t *prof_tmp = lc->tmp + PROF_TEMP_OFFSET * HIGHBD_MULTIPLIER(fc->ps.sps->bit_depth); const int idx = av_log2(block_w) - 1; const int lx = mvf->pred_flag - PF_L0; const Mv *mv = mvf->mv + lx; @@ -540,7 +543,7 @@ static void luma_prof(VVCLocalContext *lc, int16_t *dst, const VVCFrame *ref, const int oy = y_off + (mv->y >> 4); const int idx = av_log2(block_w) - 1; const int is_chroma = 0; - uint16_t *prof_tmp = lc->tmp2 + PROF_TEMP_OFFSET; + uint16_t *prof_tmp = lc->tmp2 + PROF_TEMP_OFFSET * HIGHBD_MULTIPLIER(fc->ps.sps->bit_depth); ptrdiff_t src_stride = ref->frame->linesize[LUMA]; const uint8_t *src = ref->frame->data[LUMA]; const int8_t *hf = ff_vvc_inter_luma_filters[VVC_INTER_LUMA_FILTER_TYPE_AFFINE][mx]; From c6a84a4ddfc9b7fa87b8caf558e248208a09abdb Mon Sep 17 00:00:00 2001 From: Wu Jianhua Date: Tue, 15 Apr 2025 04:47:55 +0800 Subject: [PATCH 21/31] avcodec/vvc/dsp: add get_clip_from_idx for high bitdepth Signed-off-by: Wu Jianhua --- libavcodec/vvc/dsp.h | 1 + libavcodec/vvc/filter.c | 14 ++------------ libavcodec/vvc/filter_template.c | 19 +++++++++++++++---- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/libavcodec/vvc/dsp.h b/libavcodec/vvc/dsp.h index ae22900931..44a4b1dc80 100644 --- a/libavcodec/vvc/dsp.h +++ b/libavcodec/vvc/dsp.h @@ -165,6 +165,7 @@ typedef struct VVCALFDSPContext { int vb_pos, int *gradient_tmp); void (*recon_coeff_and_clip)(int16_t *coeff, int16_t *clip, const int *class_idx, const int *transpose_idx, int size, const int16_t *coeff_set, const uint8_t *clip_idx_set, const uint8_t *class_to_filt); + void (*get_clip_from_idx)(int16_t *clip, const uint8_t *clip_idx); } VVCALFDSPContext; typedef struct VVCDSPContext { diff --git a/libavcodec/vvc/filter.c b/libavcodec/vvc/filter.c index 3815668bcf..070de50c69 100644 --- a/libavcodec/vvc/filter.c +++ b/libavcodec/vvc/filter.c @@ -1036,14 +1036,6 @@ static void alf_filter_luma(VVCLocalContext *lc, uint8_t *dst, const uint8_t *sr fc->vvcdsp.alf.filter[LUMA](dst, dst_stride, src, src_stride, width, height, coeff, clip, vb_pos); } -static int alf_clip_from_idx(const VVCFrameContext *fc, const int idx) -{ - const VVCSPS *sps = fc->ps.sps; - const int offset[] = {0, 3, 5, 7}; - - return 1 << (sps->bit_depth - offset[idx]); -} - static void alf_filter_chroma(VVCLocalContext *lc, uint8_t *dst, const uint8_t *src, const ptrdiff_t dst_stride, const ptrdiff_t src_stride, const int c_idx, const int width, const int height, const int vb_pos, const ALFParams *alf) @@ -1053,11 +1045,9 @@ static void alf_filter_chroma(VVCLocalContext *lc, uint8_t *dst, const uint8_t * const VVCALF *aps = fc->ps.alf_list[rsh->sh_alf_aps_id_chroma]; const int idx = alf->alf_ctb_filter_alt_idx[c_idx - 1]; const int16_t *coeff = aps->chroma_coeff[idx]; - int16_t clip[ALF_NUM_COEFF_CHROMA]; - - for (int i = 0; i < ALF_NUM_COEFF_CHROMA; i++) - clip[i] = alf_clip_from_idx(fc, aps->chroma_clip_idx[idx][i]); + int16_t clip[ALF_NUM_COEFF_CHROMA * 2]; + fc->vvcdsp.alf.get_clip_from_idx(clip, aps->chroma_clip_idx[idx]); fc->vvcdsp.alf.filter[CHROMA](dst, dst_stride, src, src_stride, width, height, coeff, clip, vb_pos); } diff --git a/libavcodec/vvc/filter_template.c b/libavcodec/vvc/filter_template.c index 6dd7310089..c9bc7cdb0c 100644 --- a/libavcodec/vvc/filter_template.c +++ b/libavcodec/vvc/filter_template.c @@ -397,6 +397,16 @@ static void FUNC(alf_recon_coeff_and_clip)(int16_t *coeff, int16_t *clip, } } +static void FUNC(alf_get_clip_from_idx)(int16_t *_clip, const uint8_t *clip_idx) +{ + tpixel *clip = (tpixel *)_clip; + const int offset[] = { 0, 3, 5, 7 }; + + for (int i = 0; i < ALF_NUM_COEFF_CHROMA; i++) { + clip[i] = 1 << (BIT_DEPTH - offset[clip_idx[i]]); + } +} + #undef ALF_DIR_HORZ #undef ALF_DIR_VERT #undef ALF_DIR_DIGA0 @@ -854,9 +864,10 @@ static void FUNC(ff_vvc_sao_dsp_init)(VVCSAODSPContext *const sao) static void FUNC(ff_vvc_alf_dsp_init)(VVCALFDSPContext *const alf) { - alf->filter[LUMA] = FUNC(alf_filter_luma); - alf->filter[CHROMA] = FUNC(alf_filter_chroma); - alf->filter_cc = FUNC(alf_filter_cc); - alf->classify = FUNC(alf_classify); + alf->filter[LUMA] = FUNC(alf_filter_luma); + alf->filter[CHROMA] = FUNC(alf_filter_chroma); + alf->filter_cc = FUNC(alf_filter_cc); + alf->classify = FUNC(alf_classify); alf->recon_coeff_and_clip = FUNC(alf_recon_coeff_and_clip); + alf->get_clip_from_idx = FUNC(alf_get_clip_from_idx); } From d1f8b53e7e92e20a1c1a090241da0784f45b6a9d Mon Sep 17 00:00:00 2001 From: Wu Jianhua Date: Tue, 15 Apr 2025 04:48:02 +0800 Subject: [PATCH 22/31] avcodec/vvc/filter_template: fix alf_recon_coeff_and_clip function for high bitdepth Signed-off-by: Wu Jianhua --- libavcodec/vvc/filter.c | 2 +- libavcodec/vvc/filter_template.c | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/libavcodec/vvc/filter.c b/libavcodec/vvc/filter.c index 070de50c69..75fb8e147e 100644 --- a/libavcodec/vvc/filter.c +++ b/libavcodec/vvc/filter.c @@ -1030,7 +1030,7 @@ static void alf_filter_luma(VVCLocalContext *lc, uint8_t *dst, const uint8_t *sr int16_t *clip = (int16_t *)lc->tmp1; av_assert0(ALF_MAX_FILTER_SIZE <= sizeof(lc->tmp)); - av_assert0(ALF_MAX_FILTER_SIZE * sizeof(int16_t) <= sizeof(lc->tmp1)); + av_assert0(ALF_MAX_FILTER_SIZE * sizeof(int16_t) * fc->ps.sps->bit_depth >> 3 <= sizeof(lc->tmp1)); alf_get_coeff_and_clip(lc, coeff, clip, src, src_stride, width, height, vb_pos, alf); fc->vvcdsp.alf.filter[LUMA](dst, dst_stride, src, src_stride, width, height, coeff, clip, vb_pos); diff --git a/libavcodec/vvc/filter_template.c b/libavcodec/vvc/filter_template.c index c9bc7cdb0c..a72d4a2f47 100644 --- a/libavcodec/vvc/filter_template.c +++ b/libavcodec/vvc/filter_template.c @@ -370,7 +370,7 @@ static void FUNC(alf_classify)(int *class_idx, int *transpose_idx, } -static void FUNC(alf_recon_coeff_and_clip)(int16_t *coeff, int16_t *clip, +static void FUNC(alf_recon_coeff_and_clip)(int16_t *coeff, int16_t *_clip, const int *class_idx, const int *transpose_idx, const int size, const int16_t *coeff_set, const uint8_t *clip_idx_set, const uint8_t *class_to_filt) { @@ -381,10 +381,12 @@ static void FUNC(alf_recon_coeff_and_clip)(int16_t *coeff, int16_t *clip, { 9, 8, 10, 4, 3, 7, 11, 5, 1, 0, 2, 6 }, }; - const int16_t clip_set[] = { + const tpixel clip_set[] = { 1 << BIT_DEPTH, 1 << (BIT_DEPTH - 3), 1 << (BIT_DEPTH - 5), 1 << (BIT_DEPTH - 7) }; + tpixel *clip = (tpixel *)_clip; + for (int i = 0; i < size; i++) { const int16_t *src_coeff = coeff_set + class_to_filt[class_idx[i]] * ALF_NUM_COEFF_LUMA; const uint8_t *clip_idx = clip_idx_set + class_idx[i] * ALF_NUM_COEFF_LUMA; From b10d9ba2501bca851b6178c51943ef8d2f2e730d Mon Sep 17 00:00:00 2001 From: Wu Jianhua Date: Tue, 15 Apr 2025 04:48:09 +0800 Subject: [PATCH 23/31] avcodec/vvc/filter_template: fix alf_filter_luma function for high bitdepth This commit fixed Coding tree block filtering process for luma samples Signed-off-by: Wu Jianhua --- libavcodec/vvc/filter_template.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/libavcodec/vvc/filter_template.c b/libavcodec/vvc/filter_template.c index a72d4a2f47..708232e083 100644 --- a/libavcodec/vvc/filter_template.c +++ b/libavcodec/vvc/filter_template.c @@ -35,15 +35,16 @@ static void FUNC(lmcs_filter_luma)(uint8_t *_dst, ptrdiff_t dst_stride, const in } } -static av_always_inline int16_t FUNC(alf_clip)(pixel curr, pixel v0, pixel v1, int16_t clip) +static av_always_inline tpixel FUNC(alf_clip)(pixel curr, pixel v0, pixel v1, tpixel clip) { return av_clip(v0 - curr, -clip, clip) + av_clip(v1 - curr, -clip, clip); } static void FUNC(alf_filter_luma)(uint8_t *_dst, ptrdiff_t dst_stride, const uint8_t *_src, ptrdiff_t src_stride, - const int width, const int height, const int16_t *filter, const int16_t *clip, const int vb_pos) + const int width, const int height, const int16_t *filter, const int16_t *_clip, const int vb_pos) { const pixel *src = (pixel *)_src; + const tpixel *clip = (const tpixel *)_clip; const int shift = 7; const int offset = 1 << ( shift - 1 ); const int vb_above = vb_pos - 4; From 2f07dce26361594d026b9c505f6692a71c472112 Mon Sep 17 00:00:00 2001 From: Wu Jianhua Date: Tue, 15 Apr 2025 04:43:01 +0800 Subject: [PATCH 24/31] avcodec/vvc/filter_template: fix alf_filter_chroma function for high bitdepth This commit fixed Coding tree block filtering process for chroma samples Signed-off-by: Wu Jianhua --- libavcodec/vvc/filter_template.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/libavcodec/vvc/filter_template.c b/libavcodec/vvc/filter_template.c index 708232e083..3e455c37dd 100644 --- a/libavcodec/vvc/filter_template.c +++ b/libavcodec/vvc/filter_template.c @@ -136,13 +136,14 @@ static void FUNC(alf_filter_luma)(uint8_t *_dst, ptrdiff_t dst_stride, const uin } static void FUNC(alf_filter_chroma)(uint8_t* _dst, ptrdiff_t dst_stride, const uint8_t* _src, ptrdiff_t src_stride, - const int width, const int height, const int16_t* filter, const int16_t* clip, const int vb_pos) + const int width, const int height, const int16_t *filter, const int16_t *_clip, const int vb_pos) { - const pixel *src = (pixel *)_src; - const int shift = 7; - const int offset = 1 << ( shift - 1 ); - const int vb_above = vb_pos - 2; - const int vb_below = vb_pos + 1; + const pixel *src = (pixel *)_src; + const tpixel *clip = (const tpixel *)_clip; + const int shift = 7; + const int offset = 1 << ( shift - 1 ); + const int vb_above = vb_pos - 2; + const int vb_below = vb_pos + 1; dst_stride /= sizeof(pixel); src_stride /= sizeof(pixel); From d1a3fcbf44bd3c74a00f381049ba2cbc1ee4d64b Mon Sep 17 00:00:00 2001 From: Wu Jianhua Date: Fri, 30 May 2025 02:50:34 +0800 Subject: [PATCH 25/31] avcodec/vvc/dsp_template: remove warning conversion from size_t to const int Signed-off-by: Wu Jianhua --- libavcodec/vvc/dsp_template.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/libavcodec/vvc/dsp_template.c b/libavcodec/vvc/dsp_template.c index 13bd8cd4a1..c61ddec497 100644 --- a/libavcodec/vvc/dsp_template.c +++ b/libavcodec/vvc/dsp_template.c @@ -32,9 +32,8 @@ static void FUNC(add_residual)(uint8_t *_dst, const int *res, const int w, const int h, const ptrdiff_t _stride) { - pixel *dst = (pixel *)_dst; - - const int stride = _stride / sizeof(pixel); + pixel *dst = (pixel *)_dst; + const ptrdiff_t stride = _stride / sizeof(pixel); for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { From ce17c0ddfd42710b036a59174a45b22ad5f63cb0 Mon Sep 17 00:00:00 2001 From: Wu Jianhua Date: Fri, 2 May 2025 20:27:29 +0800 Subject: [PATCH 26/31] avcodec/vvc/dsp: init dsp template for 14 bits Signed-off-by: Wu Jianhua --- libavcodec/vvc/dsp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/libavcodec/vvc/dsp.c b/libavcodec/vvc/dsp.c index af392f2754..51aa1cd6e2 100644 --- a/libavcodec/vvc/dsp.c +++ b/libavcodec/vvc/dsp.c @@ -83,6 +83,10 @@ typedef struct IntraEdgeParams { #include "dsp_template.c" #undef BIT_DEPTH +#define BIT_DEPTH 14 +#include "dsp_template.c" +#undef BIT_DEPTH + void ff_vvc_dsp_init(VVCDSPContext *vvcdsp, int bit_depth) { #undef FUNC @@ -98,6 +102,9 @@ void ff_vvc_dsp_init(VVCDSPContext *vvcdsp, int bit_depth) FUNC(ff_vvc_alf_dsp_init, depth)(&vvcdsp->alf); \ switch (bit_depth) { + case 14: + VVC_DSP(14); + break; case 12: VVC_DSP(12); break; From 376d1fcede6ab07e4a61ba73445375c509cd3628 Mon Sep 17 00:00:00 2001 From: Wu Jianhua Date: Fri, 2 May 2025 20:28:47 +0800 Subject: [PATCH 27/31] avcodec/vvc/dsp: init dsp template for 16 bits Signed-off-by: Wu Jianhua --- libavcodec/vvc/dsp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/libavcodec/vvc/dsp.c b/libavcodec/vvc/dsp.c index 51aa1cd6e2..62810c9222 100644 --- a/libavcodec/vvc/dsp.c +++ b/libavcodec/vvc/dsp.c @@ -87,6 +87,10 @@ typedef struct IntraEdgeParams { #include "dsp_template.c" #undef BIT_DEPTH +#define BIT_DEPTH 16 +#include "dsp_template.c" +#undef BIT_DEPTH + void ff_vvc_dsp_init(VVCDSPContext *vvcdsp, int bit_depth) { #undef FUNC @@ -102,6 +106,9 @@ void ff_vvc_dsp_init(VVCDSPContext *vvcdsp, int bit_depth) FUNC(ff_vvc_alf_dsp_init, depth)(&vvcdsp->alf); \ switch (bit_depth) { + case 16: + VVC_DSP(16); + break; case 14: VVC_DSP(14); break; From 7a12c3a1a96dbafa0366df2d85623df782e7ec3a Mon Sep 17 00:00:00 2001 From: Wu Jianhua Date: Fri, 2 May 2025 20:25:13 +0800 Subject: [PATCH 28/31] avcodec/vvc/ps: support 14 bits pixel formats Signed-off-by: Wu Jianhua --- libavcodec/vvc/ps.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/libavcodec/vvc/ps.c b/libavcodec/vvc/ps.c index d9f46b219a..9258dea9e5 100644 --- a/libavcodec/vvc/ps.c +++ b/libavcodec/vvc/ps.c @@ -32,6 +32,13 @@ #include "ps.h" #include "dec.h" +static enum AVPixelFormat chroma_idc_formats[][4] = { + { AV_PIX_FMT_GRAY8, AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P }, + { AV_PIX_FMT_GRAY10, AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10 }, + { AV_PIX_FMT_GRAY12, AV_PIX_FMT_YUV420P12, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV444P12 }, + { AV_PIX_FMT_GRAY14, AV_PIX_FMT_YUV420P14, AV_PIX_FMT_YUV422P14, AV_PIX_FMT_YUV444P14 }, +}; + static int sps_map_pixel_format(VVCSPS *sps, void *log_ctx) { const H266RawSPS *r = sps->r; @@ -39,26 +46,16 @@ static int sps_map_pixel_format(VVCSPS *sps, void *log_ctx) switch (sps->bit_depth) { case 8: - if (r->sps_chroma_format_idc == 0) sps->pix_fmt = AV_PIX_FMT_GRAY8; - if (r->sps_chroma_format_idc == 1) sps->pix_fmt = AV_PIX_FMT_YUV420P; - if (r->sps_chroma_format_idc == 2) sps->pix_fmt = AV_PIX_FMT_YUV422P; - if (r->sps_chroma_format_idc == 3) sps->pix_fmt = AV_PIX_FMT_YUV444P; - break; case 10: - if (r->sps_chroma_format_idc == 0) sps->pix_fmt = AV_PIX_FMT_GRAY10; - if (r->sps_chroma_format_idc == 1) sps->pix_fmt = AV_PIX_FMT_YUV420P10; - if (r->sps_chroma_format_idc == 2) sps->pix_fmt = AV_PIX_FMT_YUV422P10; - if (r->sps_chroma_format_idc == 3) sps->pix_fmt = AV_PIX_FMT_YUV444P10; - break; case 12: - if (r->sps_chroma_format_idc == 0) sps->pix_fmt = AV_PIX_FMT_GRAY12; - if (r->sps_chroma_format_idc == 1) sps->pix_fmt = AV_PIX_FMT_YUV420P12; - if (r->sps_chroma_format_idc == 2) sps->pix_fmt = AV_PIX_FMT_YUV422P12; - if (r->sps_chroma_format_idc == 3) sps->pix_fmt = AV_PIX_FMT_YUV444P12; + case 14: + enum AVPixelFormat *formats = chroma_idc_formats[sps->r->sps_bitdepth_minus8 >> 1]; + sps->pix_fmt = formats[sps->r->sps_chroma_format_idc]; break; + default: av_log(log_ctx, AV_LOG_ERROR, - "The following bit-depths are currently specified: 8, 10, 12 bits, " + "The following bit-depths are currently specified: 8, 10, 12, 14 bits, " "chroma_format_idc is %d, depth is %d\n", r->sps_chroma_format_idc, sps->bit_depth); return AVERROR_INVALIDDATA; From f84a9b13be3fd39901eaf32fe91ec815c96d96d3 Mon Sep 17 00:00:00 2001 From: Wu Jianhua Date: Fri, 2 May 2025 20:25:49 +0800 Subject: [PATCH 29/31] avcodec/vvc/ps: support 16 bits pixel formats Signed-off-by: Wu Jianhua --- libavcodec/vvc/ps.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libavcodec/vvc/ps.c b/libavcodec/vvc/ps.c index 9258dea9e5..c6f73594c0 100644 --- a/libavcodec/vvc/ps.c +++ b/libavcodec/vvc/ps.c @@ -37,6 +37,7 @@ static enum AVPixelFormat chroma_idc_formats[][4] = { { AV_PIX_FMT_GRAY10, AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10 }, { AV_PIX_FMT_GRAY12, AV_PIX_FMT_YUV420P12, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV444P12 }, { AV_PIX_FMT_GRAY14, AV_PIX_FMT_YUV420P14, AV_PIX_FMT_YUV422P14, AV_PIX_FMT_YUV444P14 }, + { AV_PIX_FMT_GRAY16, AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV444P16 }, }; static int sps_map_pixel_format(VVCSPS *sps, void *log_ctx) @@ -49,13 +50,14 @@ static int sps_map_pixel_format(VVCSPS *sps, void *log_ctx) case 10: case 12: case 14: + case 16: enum AVPixelFormat *formats = chroma_idc_formats[sps->r->sps_bitdepth_minus8 >> 1]; sps->pix_fmt = formats[sps->r->sps_chroma_format_idc]; break; default: av_log(log_ctx, AV_LOG_ERROR, - "The following bit-depths are currently specified: 8, 10, 12, 14 bits, " + "The following bit-depths are currently specified: 8, 10, 12, 14, 16 bits, " "chroma_format_idc is %d, depth is %d\n", r->sps_chroma_format_idc, sps->bit_depth); return AVERROR_INVALIDDATA; From 888d04cd98e91f4d001ef6d8ea79b41933b971da Mon Sep 17 00:00:00 2001 From: Wu Jianhua Date: Fri, 30 May 2025 00:39:07 +0800 Subject: [PATCH 30/31] Changelog: the VVC decoder supports decoding 14/16 bits bitstreams passed files: 16b400P16_A_Sony_2.bit 16b400P16_B_Sony_2.bit 16b400P16_C_Sony_2.bit 16b400P16_D_Sony_2.bit 16b400P16_E_Sony_2.bit 16b420P16_A_Sony_2.bit 16b420P16_B_Sony_2.bit 16b420P16_C_Sony_2.bit 16b420P16_D_Sony_2.bit 16b420P16_E_Sony_2.bit 16b422P16_A_Sony_2.bit 16b422P16_B_Sony_2.bit 16b422P16_C_Sony_2.bit 16b422P16_D_Sony_2.bit 16b422P16_E_Sony_2.bit 16b444Iepp_A_Sharp_3.bit 16b444Ierrc_A_Qualcomm_2.bit 16b444Ietsrc_A_Kwai_2.bit 16b444Iprrc_A_Qualcomm_2.bit 16b444Irlscp_A_OPPO_2.bit 16b444Ivvc1_A_Alibaba_2.bit 16b444Iwpp_A_OPPO_1.bit 16b444SPepp_A_Sharp_3.bit 16b444SPerrc_A_Qualcomm_2.bit 16b444SPetsrc_A_Kwai_2.bit 16b444SPetsrc_B_Kwai_2.bit 16b444SPetsrc_C_Kwai_2.bit 16b444SPetsrc_D_Kwai_2.bit 16b444SPetsrc_E_Kwai_2.bit 16b444SPetsrc_F_Kwai_2.bit 16b444SPetsrc_G_Kwai_2.bit 16b444SPetsrc_H_Kwai_2.bit 16b444SPprrc_A_Qualcomm_2.bit 16b444SPrlscp_A_OPPO_2.bit 16b444SPvvc1_A_Alibaba_2.bit 16b444SPwpp_A_OPPO_1.bit 16b444epp_A_Sharp_3.bit 16b444errc_A_Qualcomm_2.bit 16b444errc_B_Qualcomm_2.bit 16b444errc_C_Qualcomm_2.bit 16b444etsrc_A_Kwai_2.bit 16b444prrc_A_Qualcomm_2.bit 16b444rlscp_A_OPPO_2.bit 16b444vvc1_A_Alibaba_2.bit 16b444wpp_A_OPPO_1.bit Signed-off-by: Wu Jianhua --- Changelog | 1 + 1 file changed, 1 insertion(+) diff --git a/Changelog b/Changelog index 4217449438..e236df2c31 100644 --- a/Changelog +++ b/Changelog @@ -18,6 +18,7 @@ version : - APV encoding support through a libopenapv wrapper - VVC decoder supports all content of SCC (Screen Content Coding): IBC (Inter Block Copy), Palette Mode and ACT (Adaptive Color Transform +- The VVC decoder supports decoding 14/16 bits bitstreams version 7.1: From b24f1001a4328b4e5b0e64ee98686f8cb5d9d06a Mon Sep 17 00:00:00 2001 From: Nuo Mi Date: Wed, 7 Feb 2024 17:01:51 +0800 Subject: [PATCH 31/31] add github workflow --- .github/workflows/makefile.yml | 111 +++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 .github/workflows/makefile.yml diff --git a/.github/workflows/makefile.yml b/.github/workflows/makefile.yml new file mode 100644 index 0000000000..0bb7e33251 --- /dev/null +++ b/.github/workflows/makefile.yml @@ -0,0 +1,111 @@ +name: test +run-name: ${{ github.workflow }} - ${{ github.sha }} +on: + push: + branches: [ main, up ] + pull_request: + branches: [ main, up ] + workflow_dispatch: + + +jobs: + ffvvc-test: + name: ffvvc-test / ${{ matrix.os.name }}/${{ matrix.compiler.name }}/${{ matrix.assembler.name }} + env: + configure_flags: --enable-ffmpeg --disable-everything --enable-decoder=vvc --enable-parser=vvc --enable-demuxer=vvc,mpegts --enable-protocol=file,pipe --enable-encoder=rawvideo,wrapped_avframe --enable-muxer=rawvideo,md5,null + strategy: + fail-fast: false + matrix: + os: + - { name: linux, runner: ubuntu-latest, shell: bash, runner_threads: 4 } + - { name: windows, runner: windows-latest, shell: 'msys2 {0}', runner_threads: 1 } + compiler: + - { name: gcc, flags: --cc=gcc } + - { name: clang, flags: --cc=clang } + - { name: msvc, flags: --toolchain=msvc } + - { name: clang-usan, flags: '--toolchain=clang-usan' } + - { name: clang-asan, flags: '--toolchain=clang-asan' } + assembler: + - { name: no asm, flags: --disable-asm } + - { name: nasm, flags: --as=nasm } + exclude: + # GitHub's Actions runners do not support AVX2. + - os: { name: linux, runner: ubuntu-latest, shell: bash, runner_threads: 4} + compiler: { name: msvc, flags: --toolchain=msvc } + - os: { name: linux, runner: ubuntu-latest, shell: bash, runner_threads: 4 } + assembler: { name: nasm, flags: --as=nasm } + # Address sanitizer cannot be run with handwritten assembly. + - compiler: { name: clang-asan, flags: '--toolchain=clang-asan' } + assembler: { name: nasm, flags: --as=nasm } + # Windows only supports MSVC. + - os: { name: windows, runner: windows-latest, shell: 'msys2 {0}', runner_threads: 1 } + compiler: { name: gcc, flags: --cc=gcc } + - os: { name: windows, runner: windows-latest, shell: 'msys2 {0}', runner_threads: 1 } + compiler: { name: clang, flags: --cc=clang } + - os: { name: windows, runner: windows-latest, shell: 'msys2 {0}', runner_threads: 1 } + compiler: { name: clang-usan, flags: '--toolchain=clang-usan' } + - os: { name: windows, runner: windows-latest, shell: 'msys2 {0}', runner_threads: 1 } + compiler: { name: clang-asan, flags: '--toolchain=clang-asan' } + + runs-on: ${{ matrix.os.runner }} + defaults: + run: + shell: ${{ matrix.os.shell }} + + steps: + - name: Get MSVC + if: ${{ matrix.compiler.name == 'msvc' && matrix.os.name == 'windows' }} + uses: ilammy/msvc-dev-cmd@v1 + + - name: Set up MSYS2 + if: ${{ matrix.os.shell == 'msys2 {0}' }} + uses: msys2/setup-msys2@v2 + with: + release: false + msystem: UCRT64 + path-type: inherit + install: >- + make + diffutils + + - name: Setup python package + run: python3 -m pip install tqdm pyyaml + + - name: Get assembler + if: ${{ matrix.os.shell == 'msys2 {0}' && matrix.assembler.name != 'no asm' }} + run: pacman --noconfirm -S ${{ matrix.assembler.name }} + + - name: Get source + uses: actions/checkout@v3 + with: + path: FFmpeg + + - name: Configure + run: cd FFmpeg && ./configure ${{ matrix.compiler.flags }} ${{ matrix.assembler.flags }} ${{ env.configure_flags }} || (tail ffbuild/config.log; false) + + - name: Build + run: cd FFmpeg && make -j8 + + - name: Get tests + uses: actions/checkout@v3 + with: + repository: ffvvc/tests + path: tests + + - name: Unit test + run: python3 tests/tools/ffmpeg.py --threads ${{ matrix.os.runner_threads }} --ffmpeg-path=./FFmpeg/ffmpeg tests/conformance/passed + + - name: DVB 40 frames test + run: python3 tests/tools/ffmpeg.py --threads 1 --ffmpeg-path=./FFmpeg/ffmpeg tests/conformance/dvb/40frames + + - name: Check ASM + run: cd FFmpeg && make checkasm -j && ./tests/checkasm/checkasm + + - name: Negative test + run: python3 tests/tools/ffmpeg.py --threads ${{ matrix.os.runner_threads }} --ffmpeg-path=./FFmpeg/ffmpeg tests/conformance/failed || true + + - name: Check for fuzz regressions + run: python3 tests/tools/ffmpeg.py --threads ${{ matrix.os.runner_threads }} --ffmpeg-path=./FFmpeg/ffmpeg --fuzz tests/fuzz/passed + + - name: Fuzz negative test + run: python3 tests/tools/ffmpeg.py --threads ${{ matrix.os.runner_threads }} --ffmpeg-path=./FFmpeg/ffmpeg --fuzz tests/fuzz/failed || tree