From 6060fe6037a28a0a09e17e065ddb9795196356ce Mon Sep 17 00:00:00 2001
From: Wu Jianhua <toqsxw@outlook.com>
Date: Tue, 15 Apr 2025 04:45:23 +0800
Subject: [PATCH 01/31] avcodec/bit_depth_template: add tpixel for intermediate
 pixel type

The int is needed by motion compensation as intermediate type for 16 bits format

Signed-off-by: Wu Jianhua <toqsxw@outlook.com>
---
 libavcodec/bit_depth_template.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/libavcodec/bit_depth_template.c b/libavcodec/bit_depth_template.c
index ca5037148a..483f9cdef1 100644
--- a/libavcodec/bit_depth_template.c
+++ b/libavcodec/bit_depth_template.c
@@ -30,6 +30,7 @@
 #   undef pixel4
 #   undef dctcoef
 #   undef idctin
+#   undef tpixel
 #   undef no_rnd_avg_pixel4
 #   undef rnd_avg_pixel4
 #   undef AV_RN2P
@@ -52,6 +53,11 @@
 #   define pixel2 uint32_t
 #   define pixel4 uint64_t
 #   define dctcoef int32_t
+#if BIT_DEPTH > 12
+#   define tpixel int
+#else
+#   define tpixel int16_t
+#endif
 
 #ifdef IN_IDCT_DEPTH
 #if IN_IDCT_DEPTH == 32
@@ -81,6 +87,7 @@
 #   define pixel4 uint32_t
 #   define dctcoef int16_t
 #   define idctin  int16_t
+#   define tpixel int16_t
 
 #   define no_rnd_avg_pixel4 no_rnd_avg32
 #   define    rnd_avg_pixel4    rnd_avg32

From d6015a16eb1b308a0e7545cc71e769aa32a47829 Mon Sep 17 00:00:00 2001
From: Wu Jianhua <toqsxw@outlook.com>
Date: Tue, 15 Apr 2025 04:45:30 +0800
Subject: [PATCH 02/31] avcodec/h26x/h2656_inter_template: fix put pixel
 functions for high bitdepth

Signed-off-by: Wu Jianhua <toqsxw@outlook.com>
---
 libavcodec/h26x/h2656_inter_template.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/libavcodec/h26x/h2656_inter_template.c b/libavcodec/h26x/h2656_inter_template.c
index 864f6c7e7d..8811aa56a5 100644
--- a/libavcodec/h26x/h2656_inter_template.c
+++ b/libavcodec/h26x/h2656_inter_template.c
@@ -26,16 +26,17 @@
 #define LUMA_EXTRA_BEFORE       3
 #define LUMA_EXTRA              7
 
-static void FUNC(put_pixels)(int16_t *dst,
+static void FUNC(put_pixels)(int16_t *_dst,
     const uint8_t *_src, const ptrdiff_t _src_stride,
     const int height, const int8_t *hf, const int8_t *vf, const int width)
 {
     const pixel *src            = (const pixel *)_src;
+    tpixel *dst                 = (tpixel *)_dst;
     const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
 
     for (int y = 0; y < height; y++) {
         for (int x = 0; x < width; x++)
-            dst[x] = src[x] << (14 - BIT_DEPTH);
+            dst[x] = (src[x] << (FFMAX(2, 14 - BIT_DEPTH)));
         src += src_stride;
         dst += MAX_PB_SIZE;
     }
@@ -66,17 +67,13 @@ static void FUNC(put_uni_w_pixels)(uint8_t *_dst, const ptrdiff_t _dst_stride,
     pixel *dst                  = (pixel *)_dst;
     const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
     const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
-    const int shift             = denom + 14 - BIT_DEPTH;
-#if BIT_DEPTH < 14
+    const int shift             = denom + FFMAX(2, 14 - BIT_DEPTH);
     const int offset            = 1 << (shift - 1);
-#else
-    const int offset            = 0;
-#endif
-    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
+    const int ox                = _ox * (1 << FFMIN(4, BIT_DEPTH - 8));
 
     for (int y = 0; y < height; y++) {
         for (int x = 0; x < width; x++) {
-            const int v = (src[x] << (14 - BIT_DEPTH));
+            const int v = (src[x] << (FFMAX(2, 14 - BIT_DEPTH)));
             dst[x] = av_clip_pixel(((v * wx + offset) >> shift) + ox);
         }
         src += src_stride;

From 6fb787d925a080b184b849a8d7ef039d64435fbd Mon Sep 17 00:00:00 2001
From: Wu Jianhua <toqsxw@outlook.com>
Date: Tue, 15 Apr 2025 04:45:37 +0800
Subject: [PATCH 03/31] avcodec/h26x/h2656_inter_template: fix put luma
 functions for high bitdepth

Signed-off-by: Wu Jianhua <toqsxw@outlook.com>
---
 libavcodec/h26x/h2656_inter_template.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/libavcodec/h26x/h2656_inter_template.c b/libavcodec/h26x/h2656_inter_template.c
index 8811aa56a5..cece87f1ad 100644
--- a/libavcodec/h26x/h2656_inter_template.c
+++ b/libavcodec/h26x/h2656_inter_template.c
@@ -91,49 +91,55 @@ static void FUNC(put_uni_w_pixels)(uint8_t *_dst, const ptrdiff_t _dst_stride,
      filter[6] * src[x + 3 * stride] +                                         \
      filter[7] * src[x + 4 * stride])
 
-static void FUNC(put_luma_h)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
+static void FUNC(put_luma_h)(int16_t *_dst, const uint8_t *_src, const ptrdiff_t _src_stride,
     const int height, const int8_t *hf, const int8_t *vf, const int width)
 {
     const pixel *src           = (const pixel*)_src;
+    tpixel *dst                = (tpixel *)_dst;
     const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
     const int8_t *filter       = hf;
+    const int shift            = FFMIN(4, BIT_DEPTH - 8);
 
     for (int y = 0; y < height; y++) {
         for (int x = 0; x < width; x++)
-            dst[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
+            dst[x] = LUMA_FILTER(src, 1) >> shift;
         src += src_stride;
         dst += MAX_PB_SIZE;
     }
 }
 
-static void FUNC(put_luma_v)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
+static void FUNC(put_luma_v)(int16_t *_dst, const uint8_t *_src, const ptrdiff_t _src_stride,
     const int height, const int8_t *hf, const int8_t *vf, const int width)
 {
     const pixel *src           = (pixel*)_src;
+    tpixel *dst                = (tpixel *)_dst;
     const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
     const int8_t *filter       = vf;
+    const int shift            = FFMIN(4, BIT_DEPTH - 8);
 
     for (int y = 0; y < height; y++)  {
         for (int x = 0; x < width; x++)
-            dst[x] = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8);
+            dst[x] = LUMA_FILTER(src, src_stride) >> shift;
         src += src_stride;
         dst += MAX_PB_SIZE;
     }
 }
 
-static void FUNC(put_luma_hv)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
+static void FUNC(put_luma_hv)(int16_t *_dst, const uint8_t *_src, const ptrdiff_t _src_stride,
     const int height, const int8_t *hf, const int8_t *vf, const int width)
 {
-    int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE];
-    int16_t *tmp                = tmp_array;
+    tpixel tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE];
+    tpixel *tmp                 = tmp_array;
     const pixel *src            = (const pixel*)_src;
+    tpixel *dst                 = (tpixel *)_dst;
     const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
     const int8_t *filter        = hf;
+    const int shift             = FFMIN(4, BIT_DEPTH - 8);
 
     src   -= LUMA_EXTRA_BEFORE * src_stride;
     for (int y = 0; y < height + LUMA_EXTRA; y++) {
         for (int x = 0; x < width; x++)
-            tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
+            tmp[x] = LUMA_FILTER(src, 1) >> shift;
         src += src_stride;
         tmp += MAX_PB_SIZE;
     }

From 06a55571aa3bc6fa79fd74887631d87c07c12e16 Mon Sep 17 00:00:00 2001
From: Wu Jianhua <toqsxw@outlook.com>
Date: Tue, 15 Apr 2025 04:45:46 +0800
Subject: [PATCH 04/31] avcodec/h26x/h2656_inter_template: fix put chroma
 functions for high bitdepth

Signed-off-by: Wu Jianhua <toqsxw@outlook.com>
---
 libavcodec/h26x/h2656_inter_template.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/libavcodec/h26x/h2656_inter_template.c b/libavcodec/h26x/h2656_inter_template.c
index cece87f1ad..25f11fc828 100644
--- a/libavcodec/h26x/h2656_inter_template.c
+++ b/libavcodec/h26x/h2656_inter_template.c
@@ -342,50 +342,56 @@ static void FUNC(put_uni_luma_w_hv)(uint8_t *_dst,  const ptrdiff_t _dst_stride,
      filter[2] * src[x + stride] +                                             \
      filter[3] * src[x + 2 * stride])
 
-static void FUNC(put_chroma_h)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
+static void FUNC(put_chroma_h)(int16_t *_dst, const uint8_t *_src, const ptrdiff_t _src_stride,
     const int height, const int8_t *hf, const int8_t *vf, const int width)
 {
     const pixel *src            = (const pixel *)_src;
+    tpixel *dst                 = (tpixel *)_dst;
     const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
     const int8_t *filter        = hf;
+    const int shift             = FFMIN(4, BIT_DEPTH - 8);
 
     for (int y = 0; y < height; y++) {
         for (int x = 0; x < width; x++)
-            dst[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
+            dst[x] = CHROMA_FILTER(src, 1) >> shift;
         src += src_stride;
         dst += MAX_PB_SIZE;
     }
 }
 
-static void FUNC(put_chroma_v)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
+static void FUNC(put_chroma_v)(int16_t *_dst, const uint8_t *_src, const ptrdiff_t _src_stride,
     const int height, const int8_t *hf, const int8_t *vf, const int width)
 {
     const pixel *src            = (const pixel *)_src;
+    tpixel *dst                 = (tpixel *)_dst;
     const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
     const int8_t *filter        = vf;
+    const int shift             = FFMIN(4, BIT_DEPTH - 8);
 
     for (int y = 0; y < height; y++) {
         for (int x = 0; x < width; x++)
-            dst[x] = CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8);
+            dst[x] = CHROMA_FILTER(src, src_stride) >> shift;
         src += src_stride;
         dst += MAX_PB_SIZE;
     }
 }
 
-static void FUNC(put_chroma_hv)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
+static void FUNC(put_chroma_hv)(int16_t *_dst, const uint8_t *_src, const ptrdiff_t _src_stride,
     const int height, const int8_t *hf, const int8_t *vf, const int width)
 {
-    int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE];
-    int16_t *tmp                = tmp_array;
+    tpixel tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE];
+    tpixel *tmp                 = tmp_array;
     const pixel *src            = (const pixel *)_src;
+    tpixel *dst                 = (tpixel *)_dst;
     const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
     const int8_t *filter        = hf;
+    const int shift             = FFMIN(4, BIT_DEPTH - 8);
 
     src -= CHROMA_EXTRA_BEFORE * src_stride;
 
     for (int y = 0; y < height + CHROMA_EXTRA; y++) {
         for (int x = 0; x < width; x++)
-            tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
+            tmp[x] = CHROMA_FILTER(src, 1) >> shift;
         src += src_stride;
         tmp += MAX_PB_SIZE;
     }

From 1707be2a47a25479f8761a32cbe0e9399f45dc30 Mon Sep 17 00:00:00 2001
From: Wu Jianhua <toqsxw@outlook.com>
Date: Tue, 15 Apr 2025 04:45:54 +0800
Subject: [PATCH 05/31] avcodec/h26x/h2656_inter_template: fix put uni luma
 functions for high bitdepth

Signed-off-by: Wu Jianhua <toqsxw@outlook.com>
---
 libavcodec/h26x/h2656_inter_template.c | 28 ++++++++------------------
 1 file changed, 8 insertions(+), 20 deletions(-)

diff --git a/libavcodec/h26x/h2656_inter_template.c b/libavcodec/h26x/h2656_inter_template.c
index 25f11fc828..49f45b8d62 100644
--- a/libavcodec/h26x/h2656_inter_template.c
+++ b/libavcodec/h26x/h2656_inter_template.c
@@ -163,16 +163,12 @@ static void FUNC(put_uni_luma_h)(uint8_t *_dst,  const ptrdiff_t _dst_stride,
     const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
     const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel);
     const int8_t *filter       = hf;
-    const int shift            = 14 - BIT_DEPTH;
-#if BIT_DEPTH < 14
+    const int shift            = FFMAX(2, 14 - BIT_DEPTH);
     const int offset           = 1 << (shift - 1);
-#else
-    const int offset           = 0;
-#endif
 
     for (int y = 0; y < height; y++) {
         for (int x = 0; x < width; x++) {
-            const int val = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
+            const int val = LUMA_FILTER(src, 1) >> FFMIN(4, BIT_DEPTH - 8);
             dst[x]        = av_clip_pixel((val + offset) >> shift);
         }
         src   += src_stride;
@@ -190,16 +186,12 @@ static void FUNC(put_uni_luma_v)(uint8_t *_dst,  const ptrdiff_t _dst_stride,
     const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
     const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
     const int8_t *filter        = vf;
-    const int shift             = 14 - BIT_DEPTH;
-#if BIT_DEPTH < 14
+    const int shift             = FFMAX(2, 14 - BIT_DEPTH);
     const int offset            = 1 << (shift - 1);
-#else
-    const int offset            = 0;
-#endif
 
     for (int y = 0; y < height; y++) {
         for (int x = 0; x < width; x++) {
-            const int val = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8);
+            const int val = LUMA_FILTER(src, src_stride) >> FFMIN(4, BIT_DEPTH - 8);
             dst[x]        = av_clip_pixel((val + offset) >> shift);
         }
         src   += src_stride;
@@ -211,24 +203,20 @@ static void FUNC(put_uni_luma_hv)(uint8_t *_dst, const ptrdiff_t _dst_stride,
     const uint8_t *_src, const ptrdiff_t _src_stride,
     const int height, const int8_t *hf, const int8_t *vf, const int width)
 {
-    int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE];
-    int16_t *tmp                = tmp_array;
+    tpixel tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE];
+    tpixel *tmp                = tmp_array;
     const pixel *src            = (const pixel*)_src;
     pixel *dst                  = (pixel *)_dst;
     const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
     const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
     const int8_t *filter        = hf;
-    const int shift             =  14 - BIT_DEPTH;
-#if BIT_DEPTH < 14
+    int shift                   = FFMAX(2, 14 - BIT_DEPTH);
     const int offset            = 1 << (shift - 1);
-#else
-    const int offset            = 0;
-#endif
 
     src   -= LUMA_EXTRA_BEFORE * src_stride;
     for (int y = 0; y < height + LUMA_EXTRA; y++) {
         for (int x = 0; x < width; x++)
-            tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
+            tmp[x] = LUMA_FILTER(src, 1) >> FFMIN(4, BIT_DEPTH - 8);
         src += src_stride;
         tmp += MAX_PB_SIZE;
     }

From f159c0784fc809d72700297248cf6af7aba66e9c Mon Sep 17 00:00:00 2001
From: Wu Jianhua <toqsxw@outlook.com>
Date: Tue, 15 Apr 2025 04:46:05 +0800
Subject: [PATCH 06/31] avcodec/h26x/h2656_inter_template: fix put uni chroma
 functions for high bitdepth

Signed-off-by: Wu Jianhua <toqsxw@outlook.com>
---
 libavcodec/h26x/h2656_inter_template.c | 28 ++++++++------------------
 1 file changed, 8 insertions(+), 20 deletions(-)

diff --git a/libavcodec/h26x/h2656_inter_template.c b/libavcodec/h26x/h2656_inter_template.c
index 49f45b8d62..c5a92037a1 100644
--- a/libavcodec/h26x/h2656_inter_template.c
+++ b/libavcodec/h26x/h2656_inter_template.c
@@ -404,16 +404,12 @@ static void FUNC(put_uni_chroma_h)(uint8_t *_dst, const ptrdiff_t _dst_stride,
     const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
     const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
     const int8_t *filter        = hf;
-    const int shift             = 14 - BIT_DEPTH;
-#if BIT_DEPTH < 14
+    const int shift             = FFMAX(2, 14 - BIT_DEPTH);
     const int offset            = 1 << (shift - 1);
-#else
-    const int offset            = 0;
-#endif
 
     for (int y = 0; y < height; y++) {
         for (int x = 0; x < width; x++)
-            dst[x] = av_clip_pixel(((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift);
+            dst[x] = av_clip_pixel(((CHROMA_FILTER(src, 1) >> FFMIN(4, BIT_DEPTH - 8)) + offset) >> shift);
         src += src_stride;
         dst += dst_stride;
     }
@@ -428,16 +424,12 @@ static void FUNC(put_uni_chroma_v)(uint8_t *_dst, const ptrdiff_t _dst_stride,
     const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
     const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
     const int8_t *filter        = vf;
-    const int shift             = 14 - BIT_DEPTH;
-#if BIT_DEPTH < 14
+    const int shift             = FFMAX(2, 14 - BIT_DEPTH);
     const int offset            = 1 << (shift - 1);
-#else
-    const int offset            = 0;
-#endif
 
     for (int y = 0; y < height; y++) {
         for (int x = 0; x < width; x++)
-            dst[x] = av_clip_pixel(((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) + offset) >> shift);
+            dst[x] = av_clip_pixel(((CHROMA_FILTER(src, src_stride) >> FFMIN(4, BIT_DEPTH - 8)) + offset) >> shift);
         src += src_stride;
         dst += dst_stride;
     }
@@ -447,25 +439,21 @@ static void FUNC(put_uni_chroma_hv)(uint8_t *_dst, const ptrdiff_t _dst_stride,
     const uint8_t *_src, const ptrdiff_t _src_stride,
     const int height, const int8_t *hf, const int8_t *vf, const int width)
 {
-    int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE];
-    int16_t *tmp                = tmp_array;
+    tpixel tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE];
+    tpixel *tmp                 = tmp_array;
     const pixel *src            = (const pixel *)_src;
     pixel *dst                  = (pixel *)_dst;
     const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
     const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
     const int8_t *filter        = hf;
-    const int shift             = 14 - BIT_DEPTH;
-#if BIT_DEPTH < 14
+    const int shift             = FFMAX(2, 14 - BIT_DEPTH);
     const int offset            = 1 << (shift - 1);
-#else
-    const int offset            = 0;
-#endif
 
     src -= CHROMA_EXTRA_BEFORE * src_stride;
 
     for (int y = 0; y < height + CHROMA_EXTRA; y++) {
         for (int x = 0; x < width; x++)
-            tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
+            tmp[x] = CHROMA_FILTER(src, 1) >> FFMIN(4, BIT_DEPTH - 8);
         src += src_stride;
         tmp += MAX_PB_SIZE;
     }

From 78b47cced3f504c4cf012f2a11a5882f515b2237 Mon Sep 17 00:00:00 2001
From: Wu Jianhua <toqsxw@outlook.com>
Date: Tue, 15 Apr 2025 04:46:12 +0800
Subject: [PATCH 07/31] avcodec/h26x/h2656_inter_template: fix put uni luma w
 functions for high bitdepth

Signed-off-by: Wu Jianhua <toqsxw@outlook.com>
---
 libavcodec/h26x/h2656_inter_template.c | 30 ++++++++------------------
 1 file changed, 9 insertions(+), 21 deletions(-)

diff --git a/libavcodec/h26x/h2656_inter_template.c b/libavcodec/h26x/h2656_inter_template.c
index c5a92037a1..fee561b8e2 100644
--- a/libavcodec/h26x/h2656_inter_template.c
+++ b/libavcodec/h26x/h2656_inter_template.c
@@ -245,17 +245,13 @@ static void FUNC(put_uni_luma_w_h)(uint8_t *_dst,  const ptrdiff_t _dst_stride,
     const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
     const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
     const int8_t *filter        = hf;
-    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
-    const int shift             = denom + 14 - BIT_DEPTH;
-#if BIT_DEPTH < 14
+    const int ox                = _ox * (1 << FFMIN(4, BIT_DEPTH - 8));
+    const int shift             = denom + FFMAX(2, 14 - BIT_DEPTH);
     const int offset            = 1 << (shift - 1);
-#else
-    const int offset            = 0;
-#endif
 
     for (int y = 0; y < height; y++) {
         for (int x = 0; x < width; x++)
-            dst[x] = av_clip_pixel((((LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
+            dst[x] = av_clip_pixel((((LUMA_FILTER(src, 1) >> FFMIN(4, BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
         src += src_stride;
         dst += dst_stride;
     }
@@ -271,17 +267,13 @@ static void FUNC(put_uni_luma_w_v)(uint8_t *_dst,  const ptrdiff_t _dst_stride,
     const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
     const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
     const int8_t *filter        = vf;
-    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
-    const int shift             = denom + 14 - BIT_DEPTH;
-#if BIT_DEPTH < 14
+    const int ox                = _ox * (1 << FFMIN(4, BIT_DEPTH - 8));
+    const int shift             = denom + FFMAX(2, 14 - BIT_DEPTH);
     const int offset            = 1 << (shift - 1);
-#else
-    const int offset            = 0;
-#endif
 
     for (int y = 0; y < height; y++) {
         for (int x = 0; x < width; x++)
-            dst[x] = av_clip_pixel((((LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
+            dst[x] = av_clip_pixel((((LUMA_FILTER(src, src_stride) >> FFMIN(4, BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
         src += src_stride;
         dst += dst_stride;
     }
@@ -298,18 +290,14 @@ static void FUNC(put_uni_luma_w_hv)(uint8_t *_dst,  const ptrdiff_t _dst_stride,
     const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
     const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
     const int8_t *filter        = hf;
-    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
-    const int shift             = denom + 14 - BIT_DEPTH;
-#if BIT_DEPTH < 14
+    const int ox                = _ox * (1 << FFMIN(4, BIT_DEPTH - 8));
+    const int shift             = denom + FFMAX(2, 14 - BIT_DEPTH);
     const int offset            = 1 << (shift - 1);
-#else
-    const int offset            = 0;
-#endif
 
     src   -= LUMA_EXTRA_BEFORE * src_stride;
     for (int y = 0; y < height + LUMA_EXTRA; y++) {
         for (int x = 0; x < width; x++)
-            tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
+            tmp[x] = LUMA_FILTER(src, 1) >> FFMIN(4, BIT_DEPTH - 8);
         src += src_stride;
         tmp += MAX_PB_SIZE;
     }

From 0bca8ca611ee3c16d463f919ee80daf0882196e6 Mon Sep 17 00:00:00 2001
From: Wu Jianhua <toqsxw@outlook.com>
Date: Tue, 15 Apr 2025 04:46:20 +0800
Subject: [PATCH 08/31] avcodec/h26x/h2656_inter_template: fix put uni chroma w
 functions for high bitdepth

Signed-off-by: Wu Jianhua <toqsxw@outlook.com>
---
 libavcodec/h26x/h2656_inter_template.c | 30 ++++++++------------------
 1 file changed, 9 insertions(+), 21 deletions(-)

diff --git a/libavcodec/h26x/h2656_inter_template.c b/libavcodec/h26x/h2656_inter_template.c
index fee561b8e2..506f9a9704 100644
--- a/libavcodec/h26x/h2656_inter_template.c
+++ b/libavcodec/h26x/h2656_inter_template.c
@@ -466,17 +466,13 @@ static void FUNC(put_uni_chroma_w_h)(uint8_t *_dst, ptrdiff_t _dst_stride,
     const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
     const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
     const int8_t *filter        = hf;
-    const int shift             = denom + 14 - BIT_DEPTH;
-#if BIT_DEPTH < 14
+    const int shift             = denom + FFMAX(2, 14 - BIT_DEPTH);
     const int offset            = 1 << (shift - 1);
-#else
-    const int offset            = 0;
-#endif
 
-    ox     = ox * (1 << (BIT_DEPTH - 8));
+    ox     = ox * (1 << FFMIN(4, BIT_DEPTH - 8));
     for (int y = 0; y < height; y++) {
         for (int x = 0; x < width; x++) {
-            dst[x] = av_clip_pixel((((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
+            dst[x] = av_clip_pixel((((CHROMA_FILTER(src, 1) >> FFMIN(4, BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
         }
         dst += dst_stride;
         src += src_stride;
@@ -493,17 +489,13 @@ static void FUNC(put_uni_chroma_w_v)(uint8_t *_dst, const ptrdiff_t _dst_stride,
     const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
     const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
     const int8_t *filter        = vf;
-    const int shift             = denom + 14 - BIT_DEPTH;
-    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
-#if BIT_DEPTH < 14
+    const int shift             = denom + FFMAX(2, 14 - BIT_DEPTH);
+    const int ox                = _ox * (1 << FFMIN(4, BIT_DEPTH - 8));
     int offset                  = 1 << (shift - 1);
-#else
-    int offset                  = 0;
-#endif
 
     for (int y = 0; y < height; y++) {
         for (int x = 0; x < width; x++) {
-            dst[x] = av_clip_pixel((((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
+            dst[x] = av_clip_pixel((((CHROMA_FILTER(src, src_stride) >> FFMIN(4, BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
         }
         dst += dst_stride;
         src += src_stride;
@@ -521,18 +513,14 @@ static void FUNC(put_uni_chroma_w_hv)(uint8_t *_dst, ptrdiff_t _dst_stride,
     const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
     const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
     const int8_t *filter        = hf;
-    const int shift             = denom + 14 - BIT_DEPTH;
-#if BIT_DEPTH < 14
+    const int shift             = denom + FFMAX(2, 14 - BIT_DEPTH);
     const int offset            = 1 << (shift - 1);
-#else
-    const int offset            = 0;
-#endif
 
     src -= CHROMA_EXTRA_BEFORE * src_stride;
 
     for (int y = 0; y < height + CHROMA_EXTRA; y++) {
         for (int x = 0; x < width; x++)
-            tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
+            tmp[x] = CHROMA_FILTER(src, 1) >> FFMIN(4, BIT_DEPTH - 8);
         src += src_stride;
         tmp += MAX_PB_SIZE;
     }
@@ -540,7 +528,7 @@ static void FUNC(put_uni_chroma_w_hv)(uint8_t *_dst, ptrdiff_t _dst_stride,
     tmp    = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE;
     filter = vf;
 
-    ox     = ox * (1 << (BIT_DEPTH - 8));
+    ox     = ox * (1 << FFMIN(4, BIT_DEPTH - 8));
     for (int y = 0; y < height; y++) {
         for (int x = 0; x < width; x++)
             dst[x] = av_clip_pixel((((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);

From a99d07dd0c2a18bbddfcb5d675a27ab75645202c Mon Sep 17 00:00:00 2001
From: Wu Jianhua <toqsxw@outlook.com>
Date: Tue, 15 Apr 2025 04:46:50 +0800
Subject: [PATCH 09/31] avcodec/vvc/inter_template: fix avg function for high
 bitdepth

Signed-off-by: Wu Jianhua <toqsxw@outlook.com>
---
 libavcodec/vvc/inter_template.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/libavcodec/vvc/inter_template.c b/libavcodec/vvc/inter_template.c
index aee4994c17..535f5e1e75 100644
--- a/libavcodec/vvc/inter_template.c
+++ b/libavcodec/vvc/inter_template.c
@@ -183,8 +183,10 @@ static void FUNC(put_uni_chroma_w_scaled)(uint8_t *_dst, const ptrdiff_t _dst_st
 #undef TMP_STRIDE
 
 static void FUNC(avg)(uint8_t *_dst, const ptrdiff_t _dst_stride,
-    const int16_t *src0, const int16_t *src1, const int width, const int height)
+    const int16_t *_src0, const int16_t *_src1, const int width, const int height)
 {
+    const tpixel *src0          = (const tpixel *)_src0;
+    const tpixel *src1          = (const tpixel *)_src1;
     pixel *dst                  = (pixel*)_dst;
     const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
     const int shift             = FFMAX(3, 15 - BIT_DEPTH);

From 848b4602ff948c9fe6ce2d427e8d45a54e381e2c Mon Sep 17 00:00:00 2001
From: Wu Jianhua <toqsxw@outlook.com>
Date: Tue, 15 Apr 2025 04:46:58 +0800
Subject: [PATCH 10/31] avcodec/vvc/inter_template: fix w_avg function for high
 bitdepth

Signed-off-by: Wu Jianhua <toqsxw@outlook.com>
---
 libavcodec/vvc/inter_template.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/libavcodec/vvc/inter_template.c b/libavcodec/vvc/inter_template.c
index 535f5e1e75..dbb11a0fba 100644
--- a/libavcodec/vvc/inter_template.c
+++ b/libavcodec/vvc/inter_template.c
@@ -202,9 +202,11 @@ static void FUNC(avg)(uint8_t *_dst, const ptrdiff_t _dst_stride,
 }
 
 static void FUNC(w_avg)(uint8_t *_dst, const ptrdiff_t _dst_stride,
-    const int16_t *src0, const int16_t *src1, const int width, const int height,
+    const int16_t *_src0, const int16_t *_src1, const int width, const int height,
     const int denom, const int w0, const int w1, const int o0, const int o1)
 {
+    const tpixel *src0          = (const tpixel *)_src0;
+    const tpixel *src1          = (const tpixel *)_src1;
     pixel *dst                  = (pixel*)_dst;
     const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
     const int shift             = denom + FFMAX(3, 15 - BIT_DEPTH);

From 7b0bb24b877e92922a01990d1ae0e6ea26424d51 Mon Sep 17 00:00:00 2001
From: Wu Jianhua <toqsxw@outlook.com>
Date: Fri, 30 May 2025 02:55:58 +0800
Subject: [PATCH 11/31] avcodec/vvc/inter_template: fix put_scaled function for
 high bitdepth

Signed-off-by: Wu Jianhua <toqsxw@outlook.com>
---
 libavcodec/vvc/inter_template.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libavcodec/vvc/inter_template.c b/libavcodec/vvc/inter_template.c
index dbb11a0fba..89b09797fa 100644
--- a/libavcodec/vvc/inter_template.c
+++ b/libavcodec/vvc/inter_template.c
@@ -29,10 +29,10 @@ static void av_always_inline FUNC(put_scaled)(uint8_t *_dst, const ptrdiff_t _ds
     const int _x, const int _y, const int dx, const int dy,
     const int height, const int8_t *hf, const int8_t *vf, const int width, const int is_uni, const int is_chroma)
 {
-    int16_t tmp_array[TMP_STRIDE * MAX_PB_SIZE];
-    int16_t *tmp                 = tmp_array;
-    pixel *dst                   = (pixel*)_dst;
-    int16_t *dst16               = (int16_t*)_dst;
+    tpixel tmp_array[TMP_STRIDE * MAX_PB_SIZE];
+    tpixel *tmp                  = tmp_array;
+    pixel *dst                   = (pixel *)_dst;
+    tpixel *dst16                = (tpixel *)_dst;
     const ptrdiff_t dst_stride   = _dst_stride / sizeof(pixel);
     const ptrdiff_t src_stride   = _src_stride / sizeof(pixel);
     const int shift              = FFMAX(2, 14 - BIT_DEPTH);

From 80370c793e9579632001dc05617ba36a43a26512 Mon Sep 17 00:00:00 2001
From: Wu Jianhua <toqsxw@outlook.com>
Date: Tue, 15 Apr 2025 04:47:05 +0800
Subject: [PATCH 12/31] avcodec/vvc/inter_template: fix put_gpm function for
 high bitdepth

This commit fixed the decoding process for geometric partitioning mode inter blocks

Signed-off-by: Wu Jianhua <toqsxw@outlook.com>
---
 libavcodec/vvc/inter_template.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/libavcodec/vvc/inter_template.c b/libavcodec/vvc/inter_template.c
index 89b09797fa..501b49b91f 100644
--- a/libavcodec/vvc/inter_template.c
+++ b/libavcodec/vvc/inter_template.c
@@ -241,12 +241,14 @@ static void FUNC(put_ciip)(uint8_t *_dst, const ptrdiff_t _dst_stride,
 
 static void FUNC(put_gpm)(uint8_t *_dst, ptrdiff_t dst_stride,
     const int width, const int height,
-    const int16_t *src0, const int16_t *src1,
+    const int16_t *_src0, const int16_t *_src1,
     const uint8_t *weights, const int step_x, const int step_y)
 {
-    const int shift  = FFMAX(5, 17 - BIT_DEPTH);
-    const int offset = 1 << (shift - 1);
-    pixel *dst       = (pixel *)_dst;
+    const tpixel *src0 = (const tpixel *)_src0;
+    const tpixel *src1 = (const tpixel *)_src1;
+    pixel *dst         = (pixel *)_dst;
+    const int shift    = FFMAX(5, 17 - BIT_DEPTH);
+    const int offset   = 1 << (shift - 1);
 
     dst_stride /= sizeof(pixel);
     for (int y = 0; y < height; y++) {

From 84bb2f6df6f256c2cc58debab10bfc42150fa02b Mon Sep 17 00:00:00 2001
From: Wu Jianhua <toqsxw@outlook.com>
Date: Tue, 15 Apr 2025 04:47:12 +0800
Subject: [PATCH 13/31] avcodec/vvc/inter_template: fix bdof_fetch_samples
 function for high bitdepth

This commit fixed the fetching samples process for Bi-directional optical flow prediction process

Signed-off-by: Wu Jianhua <toqsxw@outlook.com>
---
 libavcodec/vvc/inter_template.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/libavcodec/vvc/inter_template.c b/libavcodec/vvc/inter_template.c
index 501b49b91f..cf6e287c1c 100644
--- a/libavcodec/vvc/inter_template.c
+++ b/libavcodec/vvc/inter_template.c
@@ -271,25 +271,25 @@ static void FUNC(bdof_fetch_samples)(int16_t *_dst, const uint8_t *_src, const p
     const int y_off             = (y_frac >> 3) - 1;
     const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
     const pixel *src            = (pixel*)_src + (x_off) + y_off * src_stride;
-    int16_t *dst                = _dst - 1 - MAX_PB_SIZE;
-    const int shift             = 14 - BIT_DEPTH;
+    tpixel *dst                 = (tpixel *)_dst - 1 - MAX_PB_SIZE;
+    const int shift             = FFMAX(2, 14 - BIT_DEPTH);
     const int bdof_width        = width + 2 * BDOF_BORDER_EXT;
 
     // top
     for (int i = 0; i < bdof_width; i++)
-        dst[i] = src[i] << shift;
+        dst[i] = (src[i] << shift);
 
     dst += MAX_PB_SIZE;
     src += src_stride;
 
     for (int i = 0; i < height; i++) {
-        dst[0] = src[0] << shift;
-        dst[1 + width] = src[1 + width] << shift;
+        dst[0] = (src[0] << shift);
+        dst[1 + width] = (src[1 + width] << shift);
         dst += MAX_PB_SIZE;
         src += src_stride;
     }
     for (int i = 0; i < bdof_width; i++)
-        dst[i] = src[i] << shift;
+        dst[i] = (src[i] << shift);
 }
 
 //8.5.6.3.3 Luma integer sample fetching process

From 0ab7d5e33dad52743bc88af362f6d2ba98f5d539 Mon Sep 17 00:00:00 2001
From: Wu Jianhua <toqsxw@outlook.com>
Date: Tue, 15 Apr 2025 04:47:19 +0800
Subject: [PATCH 14/31] avcodec/vvc/inter_template: fix prof_grad_filter
 function for high bitdepth

This commit fixed the gradient filter for prediction refinement with optical flow process

Signed-off-by: Wu Jianhua <toqsxw@outlook.com>
---
 libavcodec/vvc/inter_template.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/libavcodec/vvc/inter_template.c b/libavcodec/vvc/inter_template.c
index cf6e287c1c..7c264142bf 100644
--- a/libavcodec/vvc/inter_template.c
+++ b/libavcodec/vvc/inter_template.c
@@ -298,14 +298,13 @@ static void FUNC(fetch_samples)(int16_t *_dst, const uint8_t *_src, const ptrdif
     FUNC(bdof_fetch_samples)(_dst, _src, _src_stride, x_frac, y_frac, AFFINE_MIN_BLOCK_SIZE, AFFINE_MIN_BLOCK_SIZE);
 }
 
-static void FUNC(prof_grad_filter)(int16_t *gradient_h, int16_t *gradient_v, const ptrdiff_t gradient_stride,
-    const int16_t *_src, const ptrdiff_t src_stride, const int width, const int height)
+static void FUNC(prof_grad_filter)(tpixel *gradient_h, tpixel *gradient_v, const ptrdiff_t gradient_stride,
+    const tpixel *src, const ptrdiff_t src_stride, const int width, const int height)
 {
-    const int shift     = 6;
-    const int16_t *src  = _src;
+    const int shift    = 6;
 
     for (int y = 0; y < height; y++) {
-        const int16_t *p = src;
+        const tpixel *p = src;
         for (int x = 0; x < width; x++) {
             gradient_h[x] = (p[1] >> shift) - (p[-1] >> shift);
             gradient_v[x] = (p[src_stride] >> shift) - (p[-src_stride] >> shift);

From ddf02de339077f8636f787d14839dfc2d13960df Mon Sep 17 00:00:00 2001
From: Wu Jianhua <toqsxw@outlook.com>
Date: Tue, 15 Apr 2025 04:47:28 +0800
Subject: [PATCH 15/31] avcodec/vvc/inter_template: fix apply_prof function for
 high bitdepth

This commit fixed the prediction refinement with optical flow process

Signed-off-by: Wu Jianhua <toqsxw@outlook.com>
---
 libavcodec/vvc/inter_template.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/libavcodec/vvc/inter_template.c b/libavcodec/vvc/inter_template.c
index 7c264142bf..694e4c45d8 100644
--- a/libavcodec/vvc/inter_template.c
+++ b/libavcodec/vvc/inter_template.c
@@ -316,12 +316,15 @@ static void FUNC(prof_grad_filter)(tpixel *gradient_h, tpixel *gradient_v, const
     }
 }
 
-static void FUNC(apply_prof)(int16_t *dst, const int16_t *src, const int16_t *diff_mv_x, const int16_t *diff_mv_y)
+static void FUNC(apply_prof)(int16_t *_dst, const int16_t *_src, const int16_t *diff_mv_x, const int16_t *diff_mv_y)
 {
-    const int limit     = (1 << FFMAX(13, BIT_DEPTH + 1));          ///< dILimit
+    const tpixel *src  = (const tpixel *)_src;
+    tpixel *dst        = (tpixel *)_dst;
+    const int limit    = (1 << FFMAX(13, BIT_DEPTH + 1));          ///< dILimit
+
+    tpixel gradient_h[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE];
+    tpixel gradient_v[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE];
 
-    int16_t gradient_h[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE];
-    int16_t gradient_v[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE];
     FUNC(prof_grad_filter)(gradient_h, gradient_v, AFFINE_MIN_BLOCK_SIZE, src, MAX_PB_SIZE, AFFINE_MIN_BLOCK_SIZE, AFFINE_MIN_BLOCK_SIZE);
 
     for (int y = 0; y < AFFINE_MIN_BLOCK_SIZE; y++) {

From ce3376d3c1a26b14a6361b735cd3818de5c4b6a9 Mon Sep 17 00:00:00 2001
From: Wu Jianhua <toqsxw@outlook.com>
Date: Tue, 15 Apr 2025 04:47:35 +0800
Subject: [PATCH 16/31] avcodec/vvc/inter_template: fix apply_prof_uni function
 for high bitdepth

This commit fixed the prediction refinement with optical flow process

Signed-off-by: Wu Jianhua <toqsxw@outlook.com>
---
 libavcodec/vvc/inter_template.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/libavcodec/vvc/inter_template.c b/libavcodec/vvc/inter_template.c
index 694e4c45d8..0bddc8f471 100644
--- a/libavcodec/vvc/inter_template.c
+++ b/libavcodec/vvc/inter_template.c
@@ -333,26 +333,23 @@ static void FUNC(apply_prof)(int16_t *_dst, const int16_t *_src, const int16_t *
             const int di = gradient_h[o] * diff_mv_x[o] + gradient_v[o] * diff_mv_y[o];
             const int val = src[x] + av_clip(di, -limit, limit - 1);
             dst[x] = val;
-
         }
         src += MAX_PB_SIZE;
         dst += MAX_PB_SIZE;
     }
 }
 
-static void FUNC(apply_prof_uni)(uint8_t *_dst, const ptrdiff_t _dst_stride, const int16_t *src, const int16_t *diff_mv_x, const int16_t *diff_mv_y)
+static void FUNC(apply_prof_uni)(uint8_t *_dst, const ptrdiff_t _dst_stride, const int16_t *_src, const int16_t *diff_mv_x, const int16_t *diff_mv_y)
 {
     const int limit             = (1 << FFMAX(13, BIT_DEPTH + 1));          ///< dILimit
+    const tpixel *src           = (const tpixel *)_src;
     pixel *dst                  = (pixel*)_dst;
     const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
-    const int shift             = 14 - BIT_DEPTH;
-#if BIT_DEPTH < 14
+    const int shift             = FFMAX(2, 14 - BIT_DEPTH);
     const int offset            = 1 << (shift - 1);
-#else
-    const int offset            = 0;
-#endif
-    int16_t gradient_h[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE];
-    int16_t gradient_v[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE];
+
+    tpixel gradient_h[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE];
+    tpixel gradient_v[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE];
 
     FUNC(prof_grad_filter)(gradient_h, gradient_v, AFFINE_MIN_BLOCK_SIZE, src, MAX_PB_SIZE, AFFINE_MIN_BLOCK_SIZE, AFFINE_MIN_BLOCK_SIZE);
 
@@ -362,7 +359,6 @@ static void FUNC(apply_prof_uni)(uint8_t *_dst, const ptrdiff_t _dst_stride, con
             const int di = gradient_h[o] * diff_mv_x[o] + gradient_v[o] * diff_mv_y[o];
             const int val = src[x] + av_clip(di, -limit, limit - 1);
             dst[x] = av_clip_pixel((val + offset) >> shift);
-
         }
         src += MAX_PB_SIZE;
         dst += dst_stride;

From 20f48b02936a84890cd6e9f615196907821338a2 Mon Sep 17 00:00:00 2001
From: Wu Jianhua <toqsxw@outlook.com>
Date: Fri, 30 May 2025 02:20:36 +0800
Subject: [PATCH 17/31] avcodec/vvc/inter_template: fix apply_prof_uni_w
 function for high bitdepth

Signed-off-by: Wu Jianhua <toqsxw@outlook.com>
---
 libavcodec/vvc/inter_template.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/libavcodec/vvc/inter_template.c b/libavcodec/vvc/inter_template.c
index 0bddc8f471..64e5fab7f5 100644
--- a/libavcodec/vvc/inter_template.c
+++ b/libavcodec/vvc/inter_template.c
@@ -366,17 +366,18 @@ static void FUNC(apply_prof_uni)(uint8_t *_dst, const ptrdiff_t _dst_stride, con
 }
 
 static void FUNC(apply_prof_uni_w)(uint8_t *_dst, const ptrdiff_t _dst_stride,
-    const int16_t *src, const int16_t *diff_mv_x, const int16_t *diff_mv_y,
+    const int16_t *_src, const int16_t *diff_mv_x, const int16_t *diff_mv_y,
     const int denom, const int wx, const int _ox)
 {
     const int limit             = (1 << FFMAX(13, BIT_DEPTH + 1));          ///< dILimit
+    const tpixel *src           = (const tpixel *)_src;
     pixel *dst                  = (pixel*)_dst;
     const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
     const int shift             = denom + FFMAX(2, 14 - BIT_DEPTH);
     const int offset            = 1 << (shift - 1);
     const int ox                = _ox * (1 << (BIT_DEPTH - 8));
-    int16_t gradient_h[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE];
-    int16_t gradient_v[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE];
+    tpixel gradient_h[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE];
+    tpixel gradient_v[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE];
 
     FUNC(prof_grad_filter)(gradient_h, gradient_v, AFFINE_MIN_BLOCK_SIZE, src, MAX_PB_SIZE, AFFINE_MIN_BLOCK_SIZE, AFFINE_MIN_BLOCK_SIZE);
 

From 71a4db03990f53a00a54b4776433c8988688a169 Mon Sep 17 00:00:00 2001
From: Wu Jianhua <toqsxw@outlook.com>
Date: Tue, 15 Apr 2025 04:47:43 +0800
Subject: [PATCH 18/31] avcodec/vvc/inter_template: fix apply_bdof function for
 high bitdepth

This commit fixed the Bi-directional optical flow prediction process

Signed-off-by: Wu Jianhua <toqsxw@outlook.com>
---
 libavcodec/vvc/inter_template.c | 57 +++++++++++++++++++--------------
 1 file changed, 33 insertions(+), 24 deletions(-)

diff --git a/libavcodec/vvc/inter_template.c b/libavcodec/vvc/inter_template.c
index 64e5fab7f5..45f3640d95 100644
--- a/libavcodec/vvc/inter_template.c
+++ b/libavcodec/vvc/inter_template.c
@@ -393,9 +393,9 @@ static void FUNC(apply_prof_uni_w)(uint8_t *_dst, const ptrdiff_t _dst_stride,
     }
 }
 
-static void FUNC(derive_bdof_vx_vy)(const int16_t *_src0, const int16_t *_src1,
+static void FUNC(derive_bdof_vx_vy)(const tpixel *_src0, const tpixel *_src1,
     const int pad_left, const int pad_top, const int pad_right, const int pad_bottom,
-    const int16_t **gradient_h, const int16_t **gradient_v,
+    const tpixel **gradient_h, const tpixel **gradient_v,
     int* vx, int* vy)
 {
     const int shift2 = 4;
@@ -403,17 +403,22 @@ static void FUNC(derive_bdof_vx_vy)(const int16_t *_src0, const int16_t *_src1,
     const int thres = 1 << 4;
     int sgx2 = 0, sgy2 = 0, sgxgy = 0, sgxdi = 0, sgydi = 0;
 
+    const tpixel *gh0 = gradient_h[0];
+    const tpixel *gv0 = gradient_v[0];
+    const tpixel *gh1 = gradient_h[1];
+    const tpixel *gv1 = gradient_v[1];
+
     for (int y = -1; y < BDOF_MIN_BLOCK_SIZE + 1; y++) {
-        const int dy        = y + (pad_top && y < 0) - (pad_bottom && y == BDOF_MIN_BLOCK_SIZE);         // we pad for the first and last row
-        const int16_t *src0 = _src0 + dy * MAX_PB_SIZE;
-        const int16_t *src1 = _src1 + dy * MAX_PB_SIZE;
+        const int dy       = y + (pad_top && y < 0) - (pad_bottom && y == BDOF_MIN_BLOCK_SIZE);         // we pad for the first and last row
+        const tpixel *src0 = _src0 + dy * MAX_PB_SIZE;
+        const tpixel *src1 = _src1 + dy * MAX_PB_SIZE;
 
         for (int x = -1; x < BDOF_MIN_BLOCK_SIZE + 1; x++) {
             const int dx    = x + (pad_left && x < 0) - (pad_right && x == BDOF_MIN_BLOCK_SIZE);         // we pad for the first and last col
             const int diff  = (src0[dx] >> shift2) - (src1[dx] >> shift2);
             const int idx   = BDOF_BLOCK_SIZE * dy + dx;
-            const int temph = (gradient_h[0][idx] + gradient_h[1][idx]) >> shift3;
-            const int tempv = (gradient_v[0][idx] + gradient_v[1][idx]) >> shift3;
+            const int temph = (gh0[idx] + gh1[idx]) >> shift3;
+            const int tempv = (gv0[idx] + gv1[idx]) >> shift3;
 
             sgx2 += FFABS(temph);
             sgy2 += FFABS(tempv);
@@ -426,11 +431,11 @@ static void FUNC(derive_bdof_vx_vy)(const int16_t *_src0, const int16_t *_src1,
     *vy = sgy2 > 0 ? av_clip(((sgydi * (1 << 2)) - ((*vx * sgxgy) >> 1)) >> av_log2(sgy2), -thres + 1, thres - 1) : 0;
 }
 
-static void FUNC(apply_bdof_min_block)(pixel* dst, const ptrdiff_t dst_stride, const int16_t *src0, const int16_t *src1,
-    const int16_t **gh, const int16_t **gv, const int vx, const int vy)
+static void FUNC(apply_bdof_min_block)(pixel* dst, const ptrdiff_t dst_stride, const tpixel *src0, const tpixel *src1,
+    const tpixel **gh, const tpixel **gv, const int vx, const int vy)
 {
-    const int shift4 = 15 - BIT_DEPTH;
-    const int offset4 = 1 << (shift4 - 1);
+    const int shift4   = FFMAX(3, 15 - BIT_DEPTH);
+    const int offset4  = 1 << (shift4 - 1);
 
     for (int y = 0; y < BDOF_MIN_BLOCK_SIZE; y++) {
         for (int x = 0; x < BDOF_MIN_BLOCK_SIZE; x++) {
@@ -447,27 +452,31 @@ static void FUNC(apply_bdof_min_block)(pixel* dst, const ptrdiff_t dst_stride, c
 static void FUNC(apply_bdof)(uint8_t *_dst, const ptrdiff_t _dst_stride, const int16_t *_src0, const int16_t *_src1,
     const int block_w, const int block_h)
 {
-    int16_t gradient_h[2][BDOF_BLOCK_SIZE * BDOF_BLOCK_SIZE];
-    int16_t gradient_v[2][BDOF_BLOCK_SIZE * BDOF_BLOCK_SIZE];
+    tpixel gradient_h[2][BDOF_BLOCK_SIZE * BDOF_BLOCK_SIZE];
+    tpixel gradient_v[2][BDOF_BLOCK_SIZE * BDOF_BLOCK_SIZE];
     int vx, vy;
     const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
-    pixel* dst                  = (pixel*)_dst;
+    const tpixel *src0          = (const tpixel *)_src0;
+    const tpixel *src1          = (const tpixel *)_src1;
+    pixel *dst                  = (pixel *)_dst;
 
     FUNC(prof_grad_filter)(gradient_h[0], gradient_v[0], BDOF_BLOCK_SIZE,
-        _src0, MAX_PB_SIZE, block_w, block_h);
+        src0, MAX_PB_SIZE, block_w, block_h);
     FUNC(prof_grad_filter)(gradient_h[1], gradient_v[1], BDOF_BLOCK_SIZE,
-        _src1, MAX_PB_SIZE, block_w, block_h);
+        src1, MAX_PB_SIZE, block_w, block_h);
 
     for (int y = 0; y < block_h; y += BDOF_MIN_BLOCK_SIZE) {
         for (int x = 0; x < block_w; x += BDOF_MIN_BLOCK_SIZE) {
-            const int16_t* src0 = _src0 + y * MAX_PB_SIZE + x;
-            const int16_t* src1 = _src1 + y * MAX_PB_SIZE + x;
-            pixel *d            = dst + x;
-            const int idx       = BDOF_BLOCK_SIZE * y  + x;
-            const int16_t* gh[] = { gradient_h[0] + idx, gradient_h[1] + idx };
-            const int16_t* gv[] = { gradient_v[0] + idx, gradient_v[1] + idx };
-            FUNC(derive_bdof_vx_vy)(src0, src1, !x, !y, x + BDOF_MIN_BLOCK_SIZE == block_w, y + BDOF_MIN_BLOCK_SIZE == block_h, gh, gv, &vx, &vy);
-            FUNC(apply_bdof_min_block)(d, dst_stride, src0, src1, gh, gv, vx, vy);
+            const tpixel *s0   = src0 + y * MAX_PB_SIZE + x;
+            const tpixel *s1   = src1 + y * MAX_PB_SIZE + x;
+
+            pixel *d           = dst + x;
+            const int idx      = BDOF_BLOCK_SIZE * y + x;
+            const tpixel *gh[] = { gradient_h[0] + idx, gradient_h[1] + idx };
+            const tpixel *gv[] = { gradient_v[0] + idx, gradient_v[1] + idx };
+
+            FUNC(derive_bdof_vx_vy)(s0, s1, !x, !y, x + BDOF_MIN_BLOCK_SIZE == block_w, y + BDOF_MIN_BLOCK_SIZE == block_h, gh, gv, &vx, &vy);
+            FUNC(apply_bdof_min_block)(d, dst_stride, s0, s1, gh, gv, vx, vy);
         }
         dst += BDOF_MIN_BLOCK_SIZE * dst_stride;
     }

From c6c326eaec6c9c6c898ba16f2dfde8f9b1092877 Mon Sep 17 00:00:00 2001
From: Wu Jianhua <toqsxw@outlook.com>
Date: Tue, 15 Apr 2025 04:46:28 +0800
Subject: [PATCH 19/31] avcodec/vvc/ctu: *2 for high bitdepth

Signed-off-by: Wu Jianhua <toqsxw@outlook.com>
---
 libavcodec/vvc/ctu.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libavcodec/vvc/ctu.h b/libavcodec/vvc/ctu.h
index e37bacf9dd..b63fd3e65b 100644
--- a/libavcodec/vvc/ctu.h
+++ b/libavcodec/vvc/ctu.h
@@ -391,9 +391,9 @@ typedef struct VVCLocalContext {
 
     /* *2 for high bit depths */
     DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[EDGE_EMU_BUFFER_STRIDE * EDGE_EMU_BUFFER_STRIDE * 2];
-    DECLARE_ALIGNED(32, int16_t, tmp)[MAX_PB_SIZE * MAX_PB_SIZE];
-    DECLARE_ALIGNED(32, int16_t, tmp1)[MAX_PB_SIZE * MAX_PB_SIZE];
-    DECLARE_ALIGNED(32, int16_t, tmp2)[MAX_PB_SIZE * MAX_PB_SIZE];
+    DECLARE_ALIGNED(32, int16_t, tmp)[MAX_PB_SIZE * MAX_PB_SIZE * 2];
+    DECLARE_ALIGNED(32, int16_t, tmp1)[MAX_PB_SIZE * MAX_PB_SIZE * 2];
+    DECLARE_ALIGNED(32, int16_t, tmp2)[MAX_PB_SIZE * MAX_PB_SIZE * 2];
     DECLARE_ALIGNED(32, uint8_t, ciip_tmp)[MAX_PB_SIZE * MAX_PB_SIZE * 2];
     DECLARE_ALIGNED(32, uint8_t, sao_buffer)[(MAX_CTU_SIZE + 2 * SAO_PADDING_SIZE) * EDGE_EMU_BUFFER_STRIDE * 2];
     DECLARE_ALIGNED(32, uint8_t, alf_buffer_luma)[(MAX_CTU_SIZE + 2 * ALF_PADDING_SIZE) * EDGE_EMU_BUFFER_STRIDE * 2];

From 6e1863124ea8c2843f52dd96e2d882fbe03af80b Mon Sep 17 00:00:00 2001
From: Wu Jianhua <toqsxw@outlook.com>
Date: Tue, 15 Apr 2025 04:47:49 +0800
Subject: [PATCH 20/31] avcodec/vvc/inter: multiply the memory size of
 temporary buffer by 2 for high bitdepth

Signed-off-by: Wu Jianhua <toqsxw@outlook.com>
---
 libavcodec/vvc/inter.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/libavcodec/vvc/inter.c b/libavcodec/vvc/inter.c
index 64a9dd1e46..be8b9c8fe6 100644
--- a/libavcodec/vvc/inter.c
+++ b/libavcodec/vvc/inter.c
@@ -30,6 +30,8 @@
 #define PROF_TEMP_OFFSET (MAX_PB_SIZE + 32)
 static const int bcw_w_lut[] = {4, 5, 3, 10, -2};
 
+#define HIGHBD_MULTIPLIER(bd) (((bd) + 2) >> 3)
+
 static void subpic_get_rect(VVCRect *r, const VVCFrame *src_frame, const int subpic_idx, const int is_chroma)
 {
     const VVCSPS *sps = src_frame->sps;
@@ -319,8 +321,9 @@ static void mc_bi(VVCLocalContext *lc, uint8_t *dst, const ptrdiff_t dst_stride,
     const int hs              = fc->ps.sps->hshift[c_idx];
     const int vs              = fc->ps.sps->vshift[c_idx];
     const int idx             = av_log2(block_w) - 1;
+    const int mult            = HIGHBD_MULTIPLIER(fc->ps.sps->bit_depth);
     const VVCFrame *refs[]    = { ref0, ref1 };
-    int16_t *tmp[]            = { lc->tmp + sb_bdof_flag * PROF_TEMP_OFFSET, lc->tmp1 + sb_bdof_flag * PROF_TEMP_OFFSET };
+    int16_t *tmp[]            = { lc->tmp + sb_bdof_flag * PROF_TEMP_OFFSET * mult, lc->tmp1 + sb_bdof_flag * PROF_TEMP_OFFSET * mult };
     int denom, w0, w1, o0, o1;
     const int weight_flag     = derive_weight(&denom, &w0, &w1, &o0, &o1, lc, mvf, c_idx, pu->dmvr_flag);
     const int is_chroma       = !!c_idx;
@@ -497,7 +500,7 @@ static void luma_prof_uni(VVCLocalContext *lc, uint8_t *dst, const ptrdiff_t dst
     const VVCFrameContext *fc = lc->fc;
     const uint8_t *src        = ref->frame->data[LUMA];
     ptrdiff_t src_stride      = ref->frame->linesize[LUMA];
-    uint16_t *prof_tmp        = lc->tmp + PROF_TEMP_OFFSET;
+    int16_t *prof_tmp         = lc->tmp + PROF_TEMP_OFFSET * HIGHBD_MULTIPLIER(fc->ps.sps->bit_depth);
     const int idx             = av_log2(block_w) - 1;
     const int lx              = mvf->pred_flag - PF_L0;
     const Mv *mv              = mvf->mv + lx;
@@ -540,7 +543,7 @@ static void luma_prof(VVCLocalContext *lc, int16_t *dst, const VVCFrame *ref,
     const int oy              = y_off + (mv->y >> 4);
     const int idx             = av_log2(block_w) - 1;
     const int is_chroma       = 0;
-    uint16_t *prof_tmp        = lc->tmp2 + PROF_TEMP_OFFSET;
+    uint16_t *prof_tmp        = lc->tmp2 + PROF_TEMP_OFFSET * HIGHBD_MULTIPLIER(fc->ps.sps->bit_depth);
     ptrdiff_t src_stride      = ref->frame->linesize[LUMA];
     const uint8_t *src        = ref->frame->data[LUMA];
     const int8_t *hf          = ff_vvc_inter_luma_filters[VVC_INTER_LUMA_FILTER_TYPE_AFFINE][mx];

From c6a84a4ddfc9b7fa87b8caf558e248208a09abdb Mon Sep 17 00:00:00 2001
From: Wu Jianhua <toqsxw@outlook.com>
Date: Tue, 15 Apr 2025 04:47:55 +0800
Subject: [PATCH 21/31] avcodec/vvc/dsp: add get_clip_from_idx for high
 bitdepth

Signed-off-by: Wu Jianhua <toqsxw@outlook.com>
---
 libavcodec/vvc/dsp.h             |  1 +
 libavcodec/vvc/filter.c          | 14 ++------------
 libavcodec/vvc/filter_template.c | 19 +++++++++++++++----
 3 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/libavcodec/vvc/dsp.h b/libavcodec/vvc/dsp.h
index ae22900931..44a4b1dc80 100644
--- a/libavcodec/vvc/dsp.h
+++ b/libavcodec/vvc/dsp.h
@@ -165,6 +165,7 @@ typedef struct VVCALFDSPContext {
         int vb_pos, int *gradient_tmp);
     void (*recon_coeff_and_clip)(int16_t *coeff, int16_t *clip, const int *class_idx, const int *transpose_idx, int size,
         const int16_t *coeff_set, const uint8_t *clip_idx_set, const uint8_t *class_to_filt);
+    void (*get_clip_from_idx)(int16_t *clip, const uint8_t *clip_idx);
 } VVCALFDSPContext;
 
 typedef struct VVCDSPContext {
diff --git a/libavcodec/vvc/filter.c b/libavcodec/vvc/filter.c
index 3815668bcf..070de50c69 100644
--- a/libavcodec/vvc/filter.c
+++ b/libavcodec/vvc/filter.c
@@ -1036,14 +1036,6 @@ static void alf_filter_luma(VVCLocalContext *lc, uint8_t *dst, const uint8_t *sr
     fc->vvcdsp.alf.filter[LUMA](dst, dst_stride, src, src_stride, width, height, coeff, clip, vb_pos);
 }
 
-static int alf_clip_from_idx(const VVCFrameContext *fc, const int idx)
-{
-    const VVCSPS *sps  = fc->ps.sps;
-    const int offset[] = {0, 3, 5, 7};
-
-    return 1 << (sps->bit_depth - offset[idx]);
-}
-
 static void alf_filter_chroma(VVCLocalContext *lc, uint8_t *dst, const uint8_t *src,
     const ptrdiff_t dst_stride, const ptrdiff_t src_stride, const int c_idx,
     const int width, const int height, const int vb_pos, const ALFParams *alf)
@@ -1053,11 +1045,9 @@ static void alf_filter_chroma(VVCLocalContext *lc, uint8_t *dst, const uint8_t *
     const VVCALF *aps             = fc->ps.alf_list[rsh->sh_alf_aps_id_chroma];
     const int idx                 = alf->alf_ctb_filter_alt_idx[c_idx - 1];
     const int16_t *coeff          = aps->chroma_coeff[idx];
-    int16_t clip[ALF_NUM_COEFF_CHROMA];
-
-    for (int i = 0; i < ALF_NUM_COEFF_CHROMA; i++)
-        clip[i] = alf_clip_from_idx(fc, aps->chroma_clip_idx[idx][i]);
+    int16_t clip[ALF_NUM_COEFF_CHROMA * 2];
 
+    fc->vvcdsp.alf.get_clip_from_idx(clip, aps->chroma_clip_idx[idx]);
     fc->vvcdsp.alf.filter[CHROMA](dst, dst_stride, src, src_stride, width, height, coeff, clip, vb_pos);
 }
 
diff --git a/libavcodec/vvc/filter_template.c b/libavcodec/vvc/filter_template.c
index 6dd7310089..c9bc7cdb0c 100644
--- a/libavcodec/vvc/filter_template.c
+++ b/libavcodec/vvc/filter_template.c
@@ -397,6 +397,16 @@ static void FUNC(alf_recon_coeff_and_clip)(int16_t *coeff, int16_t *clip,
     }
 }
 
+static void FUNC(alf_get_clip_from_idx)(int16_t *_clip, const uint8_t *clip_idx)
+{
+    tpixel *clip = (tpixel *)_clip;
+    const int offset[] = { 0, 3, 5, 7 };
+
+    for (int i = 0; i < ALF_NUM_COEFF_CHROMA; i++) {
+        clip[i] = 1 << (BIT_DEPTH - offset[clip_idx[i]]);
+    }
+}
+
 #undef ALF_DIR_HORZ
 #undef ALF_DIR_VERT
 #undef ALF_DIR_DIGA0
@@ -854,9 +864,10 @@ static void FUNC(ff_vvc_sao_dsp_init)(VVCSAODSPContext *const sao)
 
 static void FUNC(ff_vvc_alf_dsp_init)(VVCALFDSPContext *const alf)
 {
-    alf->filter[LUMA]    = FUNC(alf_filter_luma);
-    alf->filter[CHROMA]  = FUNC(alf_filter_chroma);
-    alf->filter_cc       = FUNC(alf_filter_cc);
-    alf->classify        = FUNC(alf_classify);
+    alf->filter[LUMA]         = FUNC(alf_filter_luma);
+    alf->filter[CHROMA]       = FUNC(alf_filter_chroma);
+    alf->filter_cc            = FUNC(alf_filter_cc);
+    alf->classify             = FUNC(alf_classify);
     alf->recon_coeff_and_clip = FUNC(alf_recon_coeff_and_clip);
+    alf->get_clip_from_idx    = FUNC(alf_get_clip_from_idx);
 }

From d1f8b53e7e92e20a1c1a090241da0784f45b6a9d Mon Sep 17 00:00:00 2001
From: Wu Jianhua <toqsxw@outlook.com>
Date: Tue, 15 Apr 2025 04:48:02 +0800
Subject: [PATCH 22/31] avcodec/vvc/filter_template: fix
 alf_recon_coeff_and_clip function for high bitdepth

Signed-off-by: Wu Jianhua <toqsxw@outlook.com>
---
 libavcodec/vvc/filter.c          | 2 +-
 libavcodec/vvc/filter_template.c | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/libavcodec/vvc/filter.c b/libavcodec/vvc/filter.c
index 070de50c69..75fb8e147e 100644
--- a/libavcodec/vvc/filter.c
+++ b/libavcodec/vvc/filter.c
@@ -1030,7 +1030,7 @@ static void alf_filter_luma(VVCLocalContext *lc, uint8_t *dst, const uint8_t *sr
     int16_t *clip             = (int16_t *)lc->tmp1;
 
     av_assert0(ALF_MAX_FILTER_SIZE <= sizeof(lc->tmp));
-    av_assert0(ALF_MAX_FILTER_SIZE * sizeof(int16_t) <= sizeof(lc->tmp1));
+    av_assert0(ALF_MAX_FILTER_SIZE * sizeof(int16_t) * fc->ps.sps->bit_depth >> 3 <= sizeof(lc->tmp1));
 
     alf_get_coeff_and_clip(lc, coeff, clip, src, src_stride, width, height, vb_pos, alf);
     fc->vvcdsp.alf.filter[LUMA](dst, dst_stride, src, src_stride, width, height, coeff, clip, vb_pos);
diff --git a/libavcodec/vvc/filter_template.c b/libavcodec/vvc/filter_template.c
index c9bc7cdb0c..a72d4a2f47 100644
--- a/libavcodec/vvc/filter_template.c
+++ b/libavcodec/vvc/filter_template.c
@@ -370,7 +370,7 @@ static void FUNC(alf_classify)(int *class_idx, int *transpose_idx,
 
 }
 
-static void FUNC(alf_recon_coeff_and_clip)(int16_t *coeff, int16_t *clip,
+static void FUNC(alf_recon_coeff_and_clip)(int16_t *coeff, int16_t *_clip,
     const int *class_idx, const int *transpose_idx, const int size,
     const int16_t *coeff_set, const uint8_t *clip_idx_set, const uint8_t *class_to_filt)
 {
@@ -381,10 +381,12 @@ static void FUNC(alf_recon_coeff_and_clip)(int16_t *coeff, int16_t *clip,
         { 9, 8, 10, 4, 3, 7, 11, 5, 1, 0, 2, 6 },
     };
 
-    const int16_t clip_set[] = {
+    const tpixel clip_set[] = {
         1 << BIT_DEPTH, 1 << (BIT_DEPTH - 3), 1 << (BIT_DEPTH - 5), 1 << (BIT_DEPTH - 7)
     };
 
+    tpixel *clip = (tpixel *)_clip;
+
     for (int i = 0; i < size; i++) {
         const int16_t  *src_coeff = coeff_set + class_to_filt[class_idx[i]] * ALF_NUM_COEFF_LUMA;
         const uint8_t *clip_idx  = clip_idx_set + class_idx[i] * ALF_NUM_COEFF_LUMA;

From b10d9ba2501bca851b6178c51943ef8d2f2e730d Mon Sep 17 00:00:00 2001
From: Wu Jianhua <toqsxw@outlook.com>
Date: Tue, 15 Apr 2025 04:48:09 +0800
Subject: [PATCH 23/31] avcodec/vvc/filter_template: fix alf_filter_luma
 function for high bitdepth

This commit fixed Coding tree block filtering process for luma samples

Signed-off-by: Wu Jianhua <toqsxw@outlook.com>
---
 libavcodec/vvc/filter_template.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/libavcodec/vvc/filter_template.c b/libavcodec/vvc/filter_template.c
index a72d4a2f47..708232e083 100644
--- a/libavcodec/vvc/filter_template.c
+++ b/libavcodec/vvc/filter_template.c
@@ -35,15 +35,16 @@ static void FUNC(lmcs_filter_luma)(uint8_t *_dst, ptrdiff_t dst_stride, const in
     }
 }
 
-static av_always_inline int16_t FUNC(alf_clip)(pixel curr, pixel v0, pixel v1, int16_t clip)
+static av_always_inline tpixel FUNC(alf_clip)(pixel curr, pixel v0, pixel v1, tpixel clip)
 {
     return av_clip(v0 - curr, -clip, clip) + av_clip(v1 - curr, -clip, clip);
 }
 
 static void FUNC(alf_filter_luma)(uint8_t *_dst, ptrdiff_t dst_stride, const uint8_t *_src, ptrdiff_t src_stride,
-    const int width, const int height, const int16_t *filter, const int16_t *clip, const int vb_pos)
+    const int width, const int height, const int16_t *filter, const int16_t *_clip, const int vb_pos)
 {
     const pixel *src    = (pixel *)_src;
+    const tpixel *clip  = (const tpixel *)_clip;
     const int shift     = 7;
     const int offset    = 1 << ( shift - 1 );
     const int vb_above  = vb_pos - 4;

From 2f07dce26361594d026b9c505f6692a71c472112 Mon Sep 17 00:00:00 2001
From: Wu Jianhua <toqsxw@outlook.com>
Date: Tue, 15 Apr 2025 04:43:01 +0800
Subject: [PATCH 24/31] avcodec/vvc/filter_template: fix alf_filter_chroma
 function for high bitdepth

This commit fixed Coding tree block filtering process for chroma samples

Signed-off-by: Wu Jianhua <toqsxw@outlook.com>
---
 libavcodec/vvc/filter_template.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/libavcodec/vvc/filter_template.c b/libavcodec/vvc/filter_template.c
index 708232e083..3e455c37dd 100644
--- a/libavcodec/vvc/filter_template.c
+++ b/libavcodec/vvc/filter_template.c
@@ -136,13 +136,14 @@ static void FUNC(alf_filter_luma)(uint8_t *_dst, ptrdiff_t dst_stride, const uin
 }
 
 static void FUNC(alf_filter_chroma)(uint8_t* _dst, ptrdiff_t dst_stride, const uint8_t* _src, ptrdiff_t src_stride,
-    const int width, const int height, const int16_t* filter, const int16_t* clip, const int vb_pos)
+    const int width, const int height, const int16_t *filter, const int16_t *_clip, const int vb_pos)
 {
-    const pixel *src = (pixel *)_src;
-    const int shift  = 7;
-    const int offset = 1 << ( shift - 1 );
-    const int vb_above  = vb_pos - 2;
-    const int vb_below  = vb_pos + 1;
+    const pixel *src   = (pixel *)_src;
+    const tpixel *clip = (const tpixel *)_clip;
+    const int shift    = 7;
+    const int offset   = 1 << ( shift - 1 );
+    const int vb_above = vb_pos - 2;
+    const int vb_below = vb_pos + 1;
 
     dst_stride /= sizeof(pixel);
     src_stride /= sizeof(pixel);

From d1a3fcbf44bd3c74a00f381049ba2cbc1ee4d64b Mon Sep 17 00:00:00 2001
From: Wu Jianhua <toqsxw@outlook.com>
Date: Fri, 30 May 2025 02:50:34 +0800
Subject: [PATCH 25/31] avcodec/vvc/dsp_template: remove warning conversion
 from size_t to const int

Signed-off-by: Wu Jianhua <toqsxw@outlook.com>
---
 libavcodec/vvc/dsp_template.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/libavcodec/vvc/dsp_template.c b/libavcodec/vvc/dsp_template.c
index 13bd8cd4a1..c61ddec497 100644
--- a/libavcodec/vvc/dsp_template.c
+++ b/libavcodec/vvc/dsp_template.c
@@ -32,9 +32,8 @@
 static void FUNC(add_residual)(uint8_t *_dst, const int *res,
     const int w, const int h, const ptrdiff_t _stride)
 {
-    pixel *dst          = (pixel *)_dst;
-
-    const int stride    = _stride / sizeof(pixel);
+    pixel *dst             = (pixel *)_dst;
+    const ptrdiff_t stride = _stride / sizeof(pixel);
 
     for (int y = 0; y < h; y++) {
         for (int x = 0; x < w; x++) {

From ce17c0ddfd42710b036a59174a45b22ad5f63cb0 Mon Sep 17 00:00:00 2001
From: Wu Jianhua <toqsxw@outlook.com>
Date: Fri, 2 May 2025 20:27:29 +0800
Subject: [PATCH 26/31] avcodec/vvc/dsp: init dsp template for 14 bits

Signed-off-by: Wu Jianhua <toqsxw@outlook.com>
---
 libavcodec/vvc/dsp.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/libavcodec/vvc/dsp.c b/libavcodec/vvc/dsp.c
index af392f2754..51aa1cd6e2 100644
--- a/libavcodec/vvc/dsp.c
+++ b/libavcodec/vvc/dsp.c
@@ -83,6 +83,10 @@ typedef struct IntraEdgeParams {
 #include "dsp_template.c"
 #undef BIT_DEPTH
 
+#define BIT_DEPTH 14
+#include "dsp_template.c"
+#undef BIT_DEPTH
+
 void ff_vvc_dsp_init(VVCDSPContext *vvcdsp, int bit_depth)
 {
 #undef FUNC
@@ -98,6 +102,9 @@ void ff_vvc_dsp_init(VVCDSPContext *vvcdsp, int bit_depth)
     FUNC(ff_vvc_alf_dsp_init, depth)(&vvcdsp->alf);                             \
 
     switch (bit_depth) {
+    case 14:
+        VVC_DSP(14);
+        break;
     case 12:
         VVC_DSP(12);
         break;

From 376d1fcede6ab07e4a61ba73445375c509cd3628 Mon Sep 17 00:00:00 2001
From: Wu Jianhua <toqsxw@outlook.com>
Date: Fri, 2 May 2025 20:28:47 +0800
Subject: [PATCH 27/31] avcodec/vvc/dsp: init dsp template for 16 bits

Signed-off-by: Wu Jianhua <toqsxw@outlook.com>
---
 libavcodec/vvc/dsp.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/libavcodec/vvc/dsp.c b/libavcodec/vvc/dsp.c
index 51aa1cd6e2..62810c9222 100644
--- a/libavcodec/vvc/dsp.c
+++ b/libavcodec/vvc/dsp.c
@@ -87,6 +87,10 @@ typedef struct IntraEdgeParams {
 #include "dsp_template.c"
 #undef BIT_DEPTH
 
+#define BIT_DEPTH 16
+#include "dsp_template.c"
+#undef BIT_DEPTH
+
 void ff_vvc_dsp_init(VVCDSPContext *vvcdsp, int bit_depth)
 {
 #undef FUNC
@@ -102,6 +106,9 @@ void ff_vvc_dsp_init(VVCDSPContext *vvcdsp, int bit_depth)
     FUNC(ff_vvc_alf_dsp_init, depth)(&vvcdsp->alf);                             \
 
     switch (bit_depth) {
+    case 16:
+        VVC_DSP(16);
+        break;
     case 14:
         VVC_DSP(14);
         break;

From 7a12c3a1a96dbafa0366df2d85623df782e7ec3a Mon Sep 17 00:00:00 2001
From: Wu Jianhua <toqsxw@outlook.com>
Date: Fri, 2 May 2025 20:25:13 +0800
Subject: [PATCH 28/31] avcodec/vvc/ps: support 14 bits pixel formats

Signed-off-by: Wu Jianhua <toqsxw@outlook.com>
---
 libavcodec/vvc/ps.c | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/libavcodec/vvc/ps.c b/libavcodec/vvc/ps.c
index d9f46b219a..9258dea9e5 100644
--- a/libavcodec/vvc/ps.c
+++ b/libavcodec/vvc/ps.c
@@ -32,6 +32,13 @@
 #include "ps.h"
 #include "dec.h"
 
+static enum AVPixelFormat chroma_idc_formats[][4] = {
+    {  AV_PIX_FMT_GRAY8,   AV_PIX_FMT_YUV420P,  AV_PIX_FMT_YUV422P,    AV_PIX_FMT_YUV444P },
+    { AV_PIX_FMT_GRAY10, AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10 },
+    { AV_PIX_FMT_GRAY12, AV_PIX_FMT_YUV420P12, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV444P12 },
+    { AV_PIX_FMT_GRAY14, AV_PIX_FMT_YUV420P14, AV_PIX_FMT_YUV422P14, AV_PIX_FMT_YUV444P14 },
+};
+
 static int sps_map_pixel_format(VVCSPS *sps, void *log_ctx)
 {
     const H266RawSPS *r = sps->r;
@@ -39,26 +46,16 @@ static int sps_map_pixel_format(VVCSPS *sps, void *log_ctx)
 
     switch (sps->bit_depth) {
     case 8:
-        if (r->sps_chroma_format_idc == 0) sps->pix_fmt = AV_PIX_FMT_GRAY8;
-        if (r->sps_chroma_format_idc == 1) sps->pix_fmt = AV_PIX_FMT_YUV420P;
-        if (r->sps_chroma_format_idc == 2) sps->pix_fmt = AV_PIX_FMT_YUV422P;
-        if (r->sps_chroma_format_idc == 3) sps->pix_fmt = AV_PIX_FMT_YUV444P;
-       break;
     case 10:
-        if (r->sps_chroma_format_idc == 0) sps->pix_fmt = AV_PIX_FMT_GRAY10;
-        if (r->sps_chroma_format_idc == 1) sps->pix_fmt = AV_PIX_FMT_YUV420P10;
-        if (r->sps_chroma_format_idc == 2) sps->pix_fmt = AV_PIX_FMT_YUV422P10;
-        if (r->sps_chroma_format_idc == 3) sps->pix_fmt = AV_PIX_FMT_YUV444P10;
-        break;
     case 12:
-        if (r->sps_chroma_format_idc == 0) sps->pix_fmt = AV_PIX_FMT_GRAY12;
-        if (r->sps_chroma_format_idc == 1) sps->pix_fmt = AV_PIX_FMT_YUV420P12;
-        if (r->sps_chroma_format_idc == 2) sps->pix_fmt = AV_PIX_FMT_YUV422P12;
-        if (r->sps_chroma_format_idc == 3) sps->pix_fmt = AV_PIX_FMT_YUV444P12;
+    case 14:
+        enum AVPixelFormat *formats = chroma_idc_formats[sps->r->sps_bitdepth_minus8 >> 1];
+        sps->pix_fmt = formats[sps->r->sps_chroma_format_idc];
         break;
+
     default:
         av_log(log_ctx, AV_LOG_ERROR,
-               "The following bit-depths are currently specified: 8, 10, 12 bits, "
+               "The following bit-depths are currently specified: 8, 10, 12, 14 bits, "
                "chroma_format_idc is %d, depth is %d\n",
                r->sps_chroma_format_idc, sps->bit_depth);
         return AVERROR_INVALIDDATA;

From f84a9b13be3fd39901eaf32fe91ec815c96d96d3 Mon Sep 17 00:00:00 2001
From: Wu Jianhua <toqsxw@outlook.com>
Date: Fri, 2 May 2025 20:25:49 +0800
Subject: [PATCH 29/31] avcodec/vvc/ps: support 16 bits pixel formats

Signed-off-by: Wu Jianhua <toqsxw@outlook.com>
---
 libavcodec/vvc/ps.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/libavcodec/vvc/ps.c b/libavcodec/vvc/ps.c
index 9258dea9e5..c6f73594c0 100644
--- a/libavcodec/vvc/ps.c
+++ b/libavcodec/vvc/ps.c
@@ -37,6 +37,7 @@ static enum AVPixelFormat chroma_idc_formats[][4] = {
     { AV_PIX_FMT_GRAY10, AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10 },
     { AV_PIX_FMT_GRAY12, AV_PIX_FMT_YUV420P12, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV444P12 },
     { AV_PIX_FMT_GRAY14, AV_PIX_FMT_YUV420P14, AV_PIX_FMT_YUV422P14, AV_PIX_FMT_YUV444P14 },
+    { AV_PIX_FMT_GRAY16, AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV444P16 },
 };
 
 static int sps_map_pixel_format(VVCSPS *sps, void *log_ctx)
@@ -49,13 +50,14 @@ static int sps_map_pixel_format(VVCSPS *sps, void *log_ctx)
     case 10:
     case 12:
     case 14:
+    case 16:
         enum AVPixelFormat *formats = chroma_idc_formats[sps->r->sps_bitdepth_minus8 >> 1];
         sps->pix_fmt = formats[sps->r->sps_chroma_format_idc];
         break;
 
     default:
         av_log(log_ctx, AV_LOG_ERROR,
-               "The following bit-depths are currently specified: 8, 10, 12, 14 bits, "
+               "The following bit-depths are currently specified: 8, 10, 12, 14, 16 bits, "
                "chroma_format_idc is %d, depth is %d\n",
                r->sps_chroma_format_idc, sps->bit_depth);
         return AVERROR_INVALIDDATA;

From 888d04cd98e91f4d001ef6d8ea79b41933b971da Mon Sep 17 00:00:00 2001
From: Wu Jianhua <toqsxw@outlook.com>
Date: Fri, 30 May 2025 00:39:07 +0800
Subject: [PATCH 30/31] Changelog: the VVC decoder supports decoding 14/16 bits
 bitstreams

passed files:
    16b400P16_A_Sony_2.bit
    16b400P16_B_Sony_2.bit
    16b400P16_C_Sony_2.bit
    16b400P16_D_Sony_2.bit
    16b400P16_E_Sony_2.bit
    16b420P16_A_Sony_2.bit
    16b420P16_B_Sony_2.bit
    16b420P16_C_Sony_2.bit
    16b420P16_D_Sony_2.bit
    16b420P16_E_Sony_2.bit
    16b422P16_A_Sony_2.bit
    16b422P16_B_Sony_2.bit
    16b422P16_C_Sony_2.bit
    16b422P16_D_Sony_2.bit
    16b422P16_E_Sony_2.bit
    16b444Iepp_A_Sharp_3.bit
    16b444Ierrc_A_Qualcomm_2.bit
    16b444Ietsrc_A_Kwai_2.bit
    16b444Iprrc_A_Qualcomm_2.bit
    16b444Irlscp_A_OPPO_2.bit
    16b444Ivvc1_A_Alibaba_2.bit
    16b444Iwpp_A_OPPO_1.bit
    16b444SPepp_A_Sharp_3.bit
    16b444SPerrc_A_Qualcomm_2.bit
    16b444SPetsrc_A_Kwai_2.bit
    16b444SPetsrc_B_Kwai_2.bit
    16b444SPetsrc_C_Kwai_2.bit
    16b444SPetsrc_D_Kwai_2.bit
    16b444SPetsrc_E_Kwai_2.bit
    16b444SPetsrc_F_Kwai_2.bit
    16b444SPetsrc_G_Kwai_2.bit
    16b444SPetsrc_H_Kwai_2.bit
    16b444SPprrc_A_Qualcomm_2.bit
    16b444SPrlscp_A_OPPO_2.bit
    16b444SPvvc1_A_Alibaba_2.bit
    16b444SPwpp_A_OPPO_1.bit
    16b444epp_A_Sharp_3.bit
    16b444errc_A_Qualcomm_2.bit
    16b444errc_B_Qualcomm_2.bit
    16b444errc_C_Qualcomm_2.bit
    16b444etsrc_A_Kwai_2.bit
    16b444prrc_A_Qualcomm_2.bit
    16b444rlscp_A_OPPO_2.bit
    16b444vvc1_A_Alibaba_2.bit
    16b444wpp_A_OPPO_1.bit

Signed-off-by: Wu Jianhua <toqsxw@outlook.com>
---
 Changelog | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Changelog b/Changelog
index 4217449438..e236df2c31 100644
--- a/Changelog
+++ b/Changelog
@@ -18,6 +18,7 @@ version <next>:
 - APV encoding support through a libopenapv wrapper
 - VVC decoder supports all content of SCC (Screen Content Coding):
   IBC (Inter Block Copy), Palette Mode and ACT (Adaptive Color Transform
+- The VVC decoder supports decoding 14/16 bits bitstreams
 
 
 version 7.1:

From b24f1001a4328b4e5b0e64ee98686f8cb5d9d06a Mon Sep 17 00:00:00 2001
From: Nuo Mi <nuomi2021@gmail.com>
Date: Wed, 7 Feb 2024 17:01:51 +0800
Subject: [PATCH 31/31] add github workflow

---
 .github/workflows/makefile.yml | 111 +++++++++++++++++++++++++++++++++
 1 file changed, 111 insertions(+)
 create mode 100644 .github/workflows/makefile.yml

diff --git a/.github/workflows/makefile.yml b/.github/workflows/makefile.yml
new file mode 100644
index 0000000000..0bb7e33251
--- /dev/null
+++ b/.github/workflows/makefile.yml
@@ -0,0 +1,111 @@
+name: test
+run-name: ${{ github.workflow }} - ${{ github.sha }}
+on:
+  push:
+    branches: [ main, up ]
+  pull_request:
+    branches: [ main, up ]
+  workflow_dispatch:
+
+
+jobs:
+  ffvvc-test:
+    name: ffvvc-test / ${{ matrix.os.name }}/${{ matrix.compiler.name }}/${{ matrix.assembler.name }}
+    env:
+      configure_flags: --enable-ffmpeg --disable-everything --enable-decoder=vvc --enable-parser=vvc --enable-demuxer=vvc,mpegts --enable-protocol=file,pipe --enable-encoder=rawvideo,wrapped_avframe --enable-muxer=rawvideo,md5,null
+    strategy:
+      fail-fast: false
+      matrix:
+        os:
+          - { name: linux, runner: ubuntu-latest, shell: bash, runner_threads: 4 }
+          - { name: windows, runner: windows-latest, shell: 'msys2 {0}', runner_threads: 1 }
+        compiler:
+          - { name: gcc, flags: --cc=gcc }
+          - { name: clang, flags: --cc=clang }
+          - { name: msvc, flags: --toolchain=msvc }
+          - { name: clang-usan, flags: '--toolchain=clang-usan' }
+          - { name: clang-asan, flags: '--toolchain=clang-asan' }
+        assembler:
+          - { name: no asm, flags: --disable-asm }
+          - { name: nasm, flags: --as=nasm }
+        exclude:
+          # GitHub's Actions runners do not support AVX2.
+          - os: { name: linux, runner: ubuntu-latest, shell: bash, runner_threads: 4}
+            compiler: { name: msvc, flags: --toolchain=msvc }
+          - os: { name: linux, runner: ubuntu-latest, shell: bash, runner_threads: 4 }
+            assembler: { name: nasm, flags: --as=nasm }
+          # Address sanitizer cannot be run with handwritten assembly.
+          - compiler: { name: clang-asan, flags: '--toolchain=clang-asan' }
+            assembler: { name: nasm, flags: --as=nasm }
+          # Windows only supports MSVC.
+          - os: { name: windows, runner: windows-latest, shell: 'msys2 {0}', runner_threads: 1 }
+            compiler: { name: gcc, flags: --cc=gcc }
+          - os: { name: windows, runner: windows-latest, shell: 'msys2 {0}', runner_threads: 1 }
+            compiler: { name: clang, flags: --cc=clang }
+          - os: { name: windows, runner: windows-latest, shell: 'msys2 {0}', runner_threads: 1 }
+            compiler: { name: clang-usan, flags: '--toolchain=clang-usan' }
+          - os: { name: windows, runner: windows-latest, shell: 'msys2 {0}', runner_threads: 1 }
+            compiler: { name: clang-asan, flags: '--toolchain=clang-asan' }
+
+    runs-on: ${{ matrix.os.runner }}
+    defaults:
+      run:
+        shell: ${{ matrix.os.shell }}
+
+    steps:
+    - name: Get MSVC
+      if: ${{ matrix.compiler.name == 'msvc' && matrix.os.name == 'windows' }}
+      uses: ilammy/msvc-dev-cmd@v1
+
+    - name: Set up MSYS2
+      if: ${{ matrix.os.shell == 'msys2 {0}' }}
+      uses: msys2/setup-msys2@v2
+      with:
+        release: false
+        msystem: UCRT64
+        path-type: inherit
+        install: >-
+          make
+          diffutils
+
+    - name: Setup python package
+      run: python3 -m pip install tqdm pyyaml
+
+    - name: Get assembler
+      if: ${{ matrix.os.shell == 'msys2 {0}' && matrix.assembler.name != 'no asm' }}
+      run: pacman --noconfirm -S ${{ matrix.assembler.name }}
+
+    - name: Get source
+      uses: actions/checkout@v3
+      with:
+        path: FFmpeg
+
+    - name: Configure
+      run: cd FFmpeg && ./configure ${{ matrix.compiler.flags }} ${{ matrix.assembler.flags }} ${{ env.configure_flags }} || (tail ffbuild/config.log; false)
+
+    - name: Build
+      run: cd FFmpeg && make -j8
+
+    - name: Get tests
+      uses: actions/checkout@v3
+      with:
+        repository: ffvvc/tests
+        path: tests
+
+    - name: Unit test
+      run: python3 tests/tools/ffmpeg.py --threads ${{ matrix.os.runner_threads }} --ffmpeg-path=./FFmpeg/ffmpeg tests/conformance/passed
+
+    - name: DVB 40 frames test
+      run: python3 tests/tools/ffmpeg.py --threads 1 --ffmpeg-path=./FFmpeg/ffmpeg tests/conformance/dvb/40frames
+
+    - name: Check ASM
+      run: cd FFmpeg && make checkasm -j && ./tests/checkasm/checkasm
+
+    - name: Negative test
+      run: python3 tests/tools/ffmpeg.py --threads ${{ matrix.os.runner_threads }} --ffmpeg-path=./FFmpeg/ffmpeg tests/conformance/failed || true
+
+    - name: Check for fuzz regressions
+      run: python3 tests/tools/ffmpeg.py --threads ${{ matrix.os.runner_threads }} --ffmpeg-path=./FFmpeg/ffmpeg --fuzz tests/fuzz/passed
+
+    - name: Fuzz negative test
+      run: python3 tests/tools/ffmpeg.py --threads ${{ matrix.os.runner_threads }} --ffmpeg-path=./FFmpeg/ffmpeg --fuzz tests/fuzz/failed || tree