diff --git a/src/global.h b/src/global.h index 63cd33c2..e7325f8e 100644 --- a/src/global.h +++ b/src/global.h @@ -316,6 +316,12 @@ typedef int32_t mv_t; #define ALIGNED(alignment) __attribute__((aligned (alignment))) #endif +#ifdef _MSC_VER +#define NO_ASAN +#else +#define NO_ASAN __attribute__((no_sanitize("address"))) +#endif + #ifdef _MSC_VER // Buggy VS2010 throws intellisense warnings if void* is not casted. #define MALLOC(type, num) (type *)malloc(sizeof(type) * (num)) diff --git a/src/intra.c b/src/intra.c index 6b3628b1..779bcf63 100644 --- a/src/intra.c +++ b/src/intra.c @@ -36,7 +36,6 @@ #include "image.h" #include "uvg_math.h" -#include "mip_data.h" #include "rdo.h" #include "search.h" #include "search_intra.h" @@ -86,17 +85,6 @@ static const uint8_t num_ref_pixels_left[16][16] = { { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 } }; - -static void mip_predict( - const encoder_state_t* const state, - const uvg_intra_references* const refs, - const uint16_t pred_block_width, - const uint16_t pred_block_height, - uvg_pixel* dst, - const int mip_mode, - const bool mip_transp); - - int8_t uvg_intra_get_dir_luma_predictor( const uint32_t x, const uint32_t y, @@ -646,298 +634,6 @@ uint8_t uvg_get_mip_flag_context( } -void uvg_mip_boundary_downsampling_1D(int* reduced_dst, const int* const ref_src, int src_len, int dst_len) -{ - if (dst_len < src_len) - { - // Create reduced boundary by downsampling - uint16_t down_smp_factor = src_len / dst_len; - const int log2_factor = uvg_math_floor_log2(down_smp_factor); - const int rounding_offset = (1 << (log2_factor - 1)); - - uint16_t src_idx = 0; - for (uint16_t dst_idx = 0; dst_idx < dst_len; dst_idx++) - { - int sum = 0; - for (int k = 0; k < down_smp_factor; k++) - { - sum += ref_src[src_idx++]; - } - reduced_dst[dst_idx] = (sum + rounding_offset) >> log2_factor; - } - } - else - { - // Copy boundary if no downsampling is needed - for (uint16_t i = 0; i < dst_len; ++i) - { - reduced_dst[i] = ref_src[i]; - } - } -} - - -void uvg_mip_reduced_pred(int* const output, - const int* const input, - const uint8_t* matrix, - const bool transpose, - const int red_bdry_size, - const int red_pred_size, - const int size_id, - const int in_offset, - const int in_offset_tr) -{ - const int input_size = 2 * red_bdry_size; - - // Use local buffer for transposed result - int out_buf_transposed[LCU_WIDTH * LCU_WIDTH]; - int* const out_ptr = transpose ? out_buf_transposed : output; - - int sum = 0; - for (int i = 0; i < input_size; i++) { - sum += input[i]; - } - const int offset = (1 << (MIP_SHIFT_MATRIX - 1)) - MIP_OFFSET_MATRIX * sum; - assert((input_size == 4 * (input_size >> 2)) && "MIP input size must be divisible by four"); - - const uint8_t* weight = matrix; - const int input_offset = transpose ? in_offset_tr : in_offset; - - const bool red_size = (size_id == 2); - int pos_res = 0; - for (int y = 0; y < red_pred_size; y++) { - for (int x = 0; x < red_pred_size; x++) { - if (red_size) { - weight -= 1; - } - int tmp0 = red_size ? 0 : (input[0] * weight[0]); - int tmp1 = input[1] * weight[1]; - int tmp2 = input[2] * weight[2]; - int tmp3 = input[3] * weight[3]; - for (int i = 4; i < input_size; i += 4) { - tmp0 += input[i] * weight[i]; - tmp1 += input[i + 1] * weight[i + 1]; - tmp2 += input[i + 2] * weight[i + 2]; - tmp3 += input[i + 3] * weight[i + 3]; - } - out_ptr[pos_res] = CLIP_TO_PIXEL(((tmp0 + tmp1 + tmp2 + tmp3 + offset) >> MIP_SHIFT_MATRIX) + input_offset); - pos_res++; - weight += input_size; - } - } - - if (transpose) { - for (int y = 0; y < red_pred_size; y++) { - for (int x = 0; x < red_pred_size; x++) { - output[y * red_pred_size + x] = out_ptr[x * red_pred_size + y]; - } - } - } -} - - -void uvg_mip_pred_upsampling_1D(int* const dst, const int* const src, const int* const boundary, - const uint16_t src_size_ups_dim, const uint16_t src_size_orth_dim, - const uint16_t src_step, const uint16_t src_stride, - const uint16_t dst_step, const uint16_t dst_stride, - const uint16_t boundary_step, - const uint16_t ups_factor) -{ - const int log2_factor = uvg_math_floor_log2(ups_factor); - assert(ups_factor >= 2 && "Upsampling factor must be at least 2."); - const int rounding_offset = 1 << (log2_factor - 1); - - uint16_t idx_orth_dim = 0; - const int* src_line = src; - int* dst_line = dst; - const int* boundary_line = boundary + boundary_step - 1; - while (idx_orth_dim < src_size_orth_dim) - { - uint16_t idx_upsample_dim = 0; - const int* before = boundary_line; - const int* behind = src_line; - int* cur_dst = dst_line; - while (idx_upsample_dim < src_size_ups_dim) - { - uint16_t pos = 1; - int scaled_before = (*before) << log2_factor; - int scaled_behind = 0; - while (pos <= ups_factor) - { - scaled_before -= *before; - scaled_behind += *behind; - *cur_dst = (scaled_before + scaled_behind + rounding_offset) >> log2_factor; - - pos++; - cur_dst += dst_step; - } - - idx_upsample_dim++; - before = behind; - behind += src_step; - } - - idx_orth_dim++; - src_line += src_stride; - dst_line += dst_stride; - boundary_line += boundary_step; - } -} - - - -/** \brief Matrix weighted intra prediction. -*/ -static void mip_predict( - const encoder_state_t* const state, - const uvg_intra_references* const refs, - const uint16_t pred_block_width, - const uint16_t pred_block_height, - uvg_pixel* dst, - const int mip_mode, - const bool mip_transp) -{ - // MIP prediction uses int values instead of uvg_pixel as some temp values may be negative - - uvg_pixel* out = dst; - int result[32*32] = {0}; - const int mode_idx = mip_mode; - - // *** INPUT PREP *** - - // Initialize prediction parameters START - uint16_t width = pred_block_width; - uint16_t height = pred_block_height; - - int size_id; // Prediction block type - if (width == 4 && height == 4) { - size_id = 0; - } - else if (width == 4 || height == 4 || (width == 8 && height == 8)) { - size_id = 1; - } - else { - size_id = 2; - } - - // Reduced boundary and prediction sizes - int red_bdry_size = (size_id == 0) ? 2 : 4; - int red_pred_size = (size_id < 2) ? 4 : 8; - - // Upsampling factors - uint16_t ups_hor_factor = width / red_pred_size; - uint16_t ups_ver_factor = height / red_pred_size; - - // Upsampling factors must be powers of two - assert(!((ups_hor_factor < 1) || ((ups_hor_factor & (ups_hor_factor - 1))) != 0) && "Horizontal upsampling factor must be power of two."); - assert(!((ups_ver_factor < 1) || ((ups_ver_factor & (ups_ver_factor - 1))) != 0) && "Vertical upsampling factor must be power of two."); - - // Initialize prediction parameters END - - int ref_samples_top[INTRA_REF_LENGTH]; - int ref_samples_left[INTRA_REF_LENGTH]; - - for (int i = 1; i < INTRA_REF_LENGTH; i++) { - ref_samples_top[i-1] = (int)refs->ref.top[i]; // NOTE: in VTM code these are indexed as x + 1 & y + 1 during init - ref_samples_left[i-1] = (int)refs->ref.left[i]; - } - - // Compute reduced boundary with Haar-downsampling - const int input_size = 2 * red_bdry_size; - - int red_bdry[MIP_MAX_INPUT_SIZE]; - int red_bdry_trans[MIP_MAX_INPUT_SIZE]; - - int* const top_reduced = &red_bdry[0]; - int* const left_reduced = &red_bdry[red_bdry_size]; - - uvg_mip_boundary_downsampling_1D(top_reduced, ref_samples_top, width, red_bdry_size); - uvg_mip_boundary_downsampling_1D(left_reduced, ref_samples_left, height, red_bdry_size); - - // Transposed reduced boundaries - int* const left_reduced_trans = &red_bdry_trans[0]; - int* const top_reduced_trans = &red_bdry_trans[red_bdry_size]; - - for (int x = 0; x < red_bdry_size; x++) { - top_reduced_trans[x] = top_reduced[x]; - } - for (int y = 0; y < red_bdry_size; y++) { - left_reduced_trans[y] = left_reduced[y]; - } - - int input_offset = red_bdry[0]; - int input_offset_trans = red_bdry_trans[0]; - - const bool has_first_col = (size_id < 2); - // First column of matrix not needed for large blocks - red_bdry[0] = has_first_col ? ((1 << (UVG_BIT_DEPTH - 1)) - input_offset) : 0; - red_bdry_trans[0] = has_first_col ? ((1 << (UVG_BIT_DEPTH - 1)) - input_offset_trans) : 0; - - for (int i = 1; i < input_size; ++i) { - red_bdry[i] -= input_offset; - red_bdry_trans[i] -= input_offset_trans; - } - - // *** INPUT PREP *** END - - // *** BLOCK PREDICT *** - - const bool need_upsampling = (ups_hor_factor > 1) || (ups_ver_factor > 1); - const bool transpose = mip_transp; - - const uint8_t* matrix; - switch (size_id) { - case 0: - matrix = &uvg_mip_matrix_4x4[mode_idx][0][0]; - break; - case 1: - matrix = &uvg_mip_matrix_8x8[mode_idx][0][0]; - break; - case 2: - matrix = &uvg_mip_matrix_16x16[mode_idx][0][0]; - break; - default: - assert(false && "Invalid MIP size id."); - } - - // Max possible size is red_pred_size * red_pred_size, red_pred_size can be either 4 or 8 - int red_pred_buffer[8*8]; - int* const reduced_pred = need_upsampling ? red_pred_buffer : result; - - const int* const reduced_bdry = transpose ? red_bdry_trans : red_bdry; - - uvg_mip_reduced_pred(reduced_pred, reduced_bdry, matrix, transpose, red_bdry_size, red_pred_size, size_id, input_offset, input_offset_trans); - if (need_upsampling) { - const int* ver_src = reduced_pred; - uint16_t ver_src_step = width; - - if (ups_hor_factor > 1) { - int* const hor_dst = result + (ups_ver_factor - 1) * width; - ver_src = hor_dst; - ver_src_step *= ups_ver_factor; - - uvg_mip_pred_upsampling_1D(hor_dst, reduced_pred, ref_samples_left, - red_pred_size, red_pred_size, - 1, red_pred_size, 1, ver_src_step, - ups_ver_factor, ups_hor_factor); - } - - if (ups_ver_factor > 1) { - uvg_mip_pred_upsampling_1D(result, ver_src, ref_samples_top, - red_pred_size, width, - ver_src_step, 1, width, 1, - 1, ups_ver_factor); - } - } - - // Assign and cast values from temp array to output - for (int i = 0; i < 32 * 32; i++) { - out[i] = (uvg_pixel)result[i]; - } - // *** BLOCK PREDICT *** END -} - - int8_t uvg_wide_angle_correction( int_fast8_t mode, const int log2_width, @@ -1618,7 +1314,7 @@ void uvg_intra_predict( if (intra_mode < 68) { if (use_mip) { assert(intra_mode >= 0 && intra_mode < 16 && "MIP mode must be between [0, 15]"); - mip_predict(state, refs, width, height, dst, intra_mode, data->pred_cu.intra.mip_is_transposed); + uvg_mip_predict(refs, width, height, dst, intra_mode, data->pred_cu.intra.mip_is_transposed); } else { intra_predict_regular(state, refs, &data->pred_cu, cu_loc, pu_loc, intra_mode, color, dst, data->pred_cu.intra.multi_ref_idx, data->pred_cu.intra.isp_mode); @@ -1804,7 +1500,7 @@ static void intra_recon_tb_leaf( uvg_intra_build_reference(state, pu_loc, cu_loc, color, &luma_px, &pic_px, lcu, &refs, cfg->wpp, extra_refs, multi_ref_index, isp_mode); - uvg_pixel pred[32 * 32]; + ALIGNED(32) uvg_pixel pred[32 * 32]; uvg_intra_predict(state, &refs, cu_loc, pu_loc, color, pred, search_data, lcu); const int index = lcu_px.x + lcu_px.y * lcu_width; diff --git a/src/mip_data.h b/src/mip_data.h deleted file mode 100644 index 536db0a5..00000000 --- a/src/mip_data.h +++ /dev/null @@ -1,885 +0,0 @@ -/***************************************************************************** - * This file is part of uvg266 VVC encoder. - * - * Copyright (c) 2021, Tampere University, ITU/ISO/IEC, project contributors - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without modification, - * are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * * Redistributions in binary form must reproduce the above copyright notice, this - * list of conditions and the following disclaimer in the documentation and/or - * other materials provided with the distribution. - * - * * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS - ****************************************************************************/ - - /** -* \ingroup Reconstruction -* \file -* MIP weight matrix data. -*/ - -/** \file MipData.h -\brief weight and bias data for matrix-based intra prediction (MIP) -*/ - -#define MIP_SHIFT_MATRIX 6 -#define MIP_OFFSET_MATRIX 32 - -// NOTE: these matrices need to be aligned if used with avx2 -const uint8_t uvg_mip_matrix_4x4[16][16][4] = -{ - { - { 32, 30, 90, 28}, - { 32, 32, 72, 28}, - { 34, 77, 53, 30}, - { 51, 124, 36, 37}, - { 31, 31, 95, 37}, - { 33, 31, 70, 50}, - { 52, 80, 25, 60}, - { 78, 107, 1, 65}, - { 31, 29, 37, 95}, - { 38, 34, 19, 101}, - { 73, 85, 0, 81}, - { 92, 99, 0, 65}, - { 34, 29, 14, 111}, - { 48, 48, 7, 100}, - { 80, 91, 0, 74}, - { 89, 97, 0, 64} - }, - { - { 31, 23, 34, 29}, - { 31, 43, 34, 31}, - { 30, 95, 34, 32}, - { 29, 100, 35, 33}, - { 31, 23, 34, 29}, - { 31, 43, 34, 31}, - { 30, 95, 34, 32}, - { 29, 99, 35, 33}, - { 31, 24, 35, 29}, - { 31, 44, 34, 31}, - { 30, 95, 35, 32}, - { 29, 99, 35, 33}, - { 31, 24, 35, 30}, - { 31, 44, 35, 31}, - { 30, 95, 35, 32}, - { 29, 99, 35, 33} - }, - { - { 32, 32, 36, 58}, - { 32, 29, 26, 66}, - { 36, 37, 23, 61}, - { 79, 84, 3, 37}, - { 32, 32, 30, 69}, - { 33, 29, 24, 71}, - { 44, 16, 21, 70}, - { 96, 18, 0, 57}, - { 32, 31, 24, 74}, - { 33, 30, 23, 71}, - { 36, 24, 24, 71}, - { 59, 9, 16, 68}, - { 32, 32, 23, 75}, - { 33, 30, 24, 70}, - { 32, 30, 25, 71}, - { 36, 26, 25, 70} - }, - { - { 32, 33, 34, 32}, - { 32, 30, 22, 38}, - { 29, 46, 25, 38}, - { 53, 123, 28, 22}, - { 32, 33, 30, 37}, - { 32, 30, 21, 38}, - { 32, 40, 24, 38}, - { 64, 116, 26, 17}, - { 32, 32, 23, 49}, - { 32, 30, 21, 39}, - { 34, 39, 24, 37}, - { 72, 109, 23, 16}, - { 33, 31, 17, 60}, - { 32, 31, 21, 39}, - { 35, 41, 24, 37}, - { 72, 106, 22, 18} - }, - { - { 34, 25, 89, 20}, - { 38, 32, 47, 24}, - { 40, 86, 29, 27}, - { 38, 98, 32, 29}, - { 34, 31, 94, 40}, - { 44, 25, 83, 27}, - { 54, 72, 43, 16}, - { 47, 94, 33, 22}, - { 33, 31, 36, 94}, - { 43, 23, 51, 76}, - { 62, 55, 64, 25}, - { 57, 89, 38, 15}, - { 32, 32, 28, 101}, - { 38, 26, 33, 94}, - { 55, 38, 68, 47}, - { 59, 80, 52, 16} - }, - { - { 28, 30, 68, 29}, - { 23, 48, 23, 48}, - { 39, 98, 16, 42}, - { 84, 86, 20, 17}, - { 25, 31, 52, 74}, - { 38, 68, 5, 70}, - { 95, 78, 7, 21}, - { 127, 54, 12, 0}, - { 30, 47, 14, 107}, - { 79, 76, 0, 53}, - { 127, 59, 7, 1}, - { 127, 51, 9, 0}, - { 50, 71, 1, 96}, - { 109, 69, 7, 25}, - { 127, 56, 9, 0}, - { 123, 53, 13, 0} - }, - { - { 40, 20, 72, 18}, - { 48, 29, 44, 18}, - { 53, 81, 35, 18}, - { 48, 96, 33, 22}, - { 45, 23, 79, 49}, - { 61, 21, 56, 49}, - { 72, 52, 32, 48}, - { 65, 69, 20, 50}, - { 41, 27, 29, 96}, - { 49, 22, 28, 94}, - { 52, 22, 28, 93}, - { 49, 27, 27, 92}, - { 37, 29, 26, 98}, - { 39, 28, 28, 97}, - { 38, 28, 30, 97}, - { 38, 29, 30, 95} - }, - { - { 33, 27, 43, 27}, - { 32, 29, 31, 31}, - { 31, 73, 33, 31}, - { 35, 104, 34, 28}, - { 32, 30, 63, 22}, - { 33, 26, 33, 29}, - { 33, 57, 33, 30}, - { 37, 100, 35, 27}, - { 32, 31, 85, 25}, - { 34, 25, 39, 25}, - { 35, 39, 32, 28}, - { 40, 91, 35, 25}, - { 32, 30, 77, 50}, - { 34, 26, 54, 22}, - { 37, 31, 34, 27}, - { 45, 75, 34, 23} - }, - { - { 34, 25, 77, 19}, - { 36, 34, 56, 24}, - { 41, 83, 39, 30}, - { 47, 96, 28, 35}, - { 34, 31, 70, 65}, - { 38, 29, 53, 77}, - { 43, 36, 37, 83}, - { 48, 39, 28, 83}, - { 33, 31, 31, 98}, - { 33, 31, 30, 99}, - { 34, 30, 31, 98}, - { 36, 29, 31, 96}, - { 32, 32, 30, 97}, - { 32, 32, 31, 96}, - { 31, 33, 33, 96}, - { 32, 33, 34, 94} - }, - { - { 30, 30, 93, 19}, - { 31, 59, 67, 34}, - { 31, 79, 36, 59}, - { 30, 67, 17, 79}, - { 30, 38, 68, 69}, - { 29, 40, 43, 91}, - { 26, 35, 32, 101}, - { 23, 32, 30, 101}, - { 26, 34, 30, 101}, - { 23, 33, 30, 102}, - { 20, 32, 31, 102}, - { 18, 33, 32, 102}, - { 23, 33, 31, 100}, - { 20, 34, 32, 100}, - { 18, 35, 33, 100}, - { 18, 35, 33, 100} - }, - { - { 31, 54, 90, 26}, - { 32, 60, 53, 61}, - { 34, 49, 37, 84}, - { 34, 39, 35, 89}, - { 35, 38, 41, 88}, - { 35, 35, 32, 96}, - { 35, 31, 33, 96}, - { 35, 32, 35, 94}, - { 34, 34, 30, 97}, - { 35, 32, 33, 95}, - { 35, 32, 34, 94}, - { 35, 34, 34, 93}, - { 34, 34, 34, 93}, - { 35, 34, 34, 93}, - { 35, 34, 34, 92}, - { 36, 34, 35, 91} - }, - { - { 32, 29, 54, 24}, - { 31, 32, 34, 29}, - { 31, 43, 34, 29}, - { 32, 67, 36, 28}, - { 31, 34, 69, 37}, - { 31, 35, 46, 33}, - { 30, 35, 39, 33}, - { 30, 42, 39, 36}, - { 31, 35, 39, 88}, - { 30, 38, 41, 84}, - { 30, 39, 40, 81}, - { 39, 46, 38, 78}, - { 31, 36, 34, 96}, - { 34, 38, 37, 93}, - { 55, 42, 38, 82}, - { 89, 53, 38, 65} - }, - { - { 32, 33, 43, 29}, - { 32, 30, 29, 33}, - { 31, 47, 31, 33}, - { 33, 100, 31, 31}, - { 32, 33, 74, 25}, - { 32, 32, 34, 31}, - { 32, 33, 30, 33}, - { 32, 68, 30, 32}, - { 32, 31, 91, 40}, - { 32, 32, 58, 26}, - { 31, 31, 30, 32}, - { 31, 42, 30, 33}, - { 32, 31, 49, 85}, - { 32, 31, 83, 35}, - { 31, 33, 48, 29}, - { 31, 36, 32, 33} - }, - { - { 31, 29, 81, 35}, - { 32, 28, 34, 50}, - { 31, 75, 16, 43}, - { 34, 103, 29, 32}, - { 32, 32, 53, 78}, - { 31, 28, 36, 88}, - { 30, 52, 18, 73}, - { 52, 88, 17, 35}, - { 32, 32, 35, 94}, - { 30, 31, 35, 95}, - { 36, 29, 31, 92}, - { 100, 43, 16, 40}, - { 32, 32, 35, 93}, - { 30, 32, 38, 93}, - { 55, 18, 37, 83}, - { 127, 0, 30, 40} - }, - { - { 31, 22, 47, 30}, - { 31, 48, 25, 34}, - { 30, 95, 31, 32}, - { 32, 103, 33, 32}, - { 30, 24, 57, 31}, - { 30, 47, 26, 34}, - { 31, 95, 31, 32}, - { 43, 97, 35, 25}, - { 29, 26, 44, 63}, - { 37, 38, 24, 47}, - { 74, 63, 28, 20}, - { 110, 58, 34, 3}, - { 46, 22, 5, 108}, - { 93, 5, 9, 77}, - { 127, 0, 17, 52}, - { 127, 0, 15, 50} - }, - { - { 32, 27, 68, 24}, - { 35, 23, 35, 28}, - { 35, 64, 29, 29}, - { 37, 104, 33, 28}, - { 32, 32, 91, 40}, - { 36, 23, 67, 36}, - { 49, 23, 39, 28}, - { 60, 67, 30, 20}, - { 32, 32, 36, 95}, - { 35, 29, 38, 93}, - { 50, 16, 30, 84}, - { 72, 16, 15, 65}, - { 32, 32, 27, 100}, - { 33, 32, 29, 100}, - { 37, 29, 30, 98}, - { 48, 21, 29, 90} - } -}; - -const uint8_t uvg_mip_matrix_8x8[8][16][8] = -{ - { - { 30, 63, 46, 37, 25, 33, 33, 34}, - { 30, 60, 66, 38, 32, 31, 32, 33}, - { 29, 45, 74, 42, 32, 32, 32, 33}, - { 30, 39, 62, 58, 32, 33, 32, 33}, - { 30, 66, 55, 39, 32, 30, 30, 36}, - { 29, 54, 69, 40, 33, 31, 31, 33}, - { 28, 48, 71, 43, 32, 33, 32, 33}, - { 28, 41, 72, 46, 32, 34, 32, 33}, - { 30, 66, 56, 40, 32, 33, 28, 33}, - { 29, 55, 69, 39, 33, 33, 30, 32}, - { 27, 46, 72, 43, 33, 33, 32, 33}, - { 27, 42, 69, 48, 32, 34, 32, 33}, - { 30, 63, 55, 40, 32, 33, 35, 30}, - { 29, 56, 66, 40, 33, 33, 33, 30}, - { 27, 47, 69, 44, 33, 33, 33, 32}, - { 27, 42, 65, 50, 32, 34, 32, 33} - }, - { - { 32, 33, 30, 31, 74, 30, 31, 32}, - { 33, 56, 28, 30, 41, 29, 32, 32}, - { 33, 77, 52, 26, 29, 34, 30, 32}, - { 33, 37, 80, 41, 31, 34, 30, 32}, - { 32, 32, 33, 31, 59, 76, 28, 31}, - { 33, 31, 31, 30, 78, 40, 28, 32}, - { 33, 47, 28, 29, 53, 27, 31, 31}, - { 33, 61, 44, 28, 34, 32, 31, 31}, - { 32, 31, 34, 30, 26, 64, 76, 27}, - { 32, 31, 34, 29, 45, 86, 36, 29}, - { 33, 27, 34, 29, 73, 55, 25, 32}, - { 33, 33, 34, 30, 62, 33, 30, 31}, - { 32, 31, 34, 30, 30, 29, 58, 74}, - { 32, 31, 35, 29, 27, 53, 77, 35}, - { 32, 30, 36, 29, 40, 80, 44, 31}, - { 33, 28, 37, 30, 58, 60, 31, 33} - }, - { - { 32, 51, 27, 32, 27, 50, 29, 32}, - { 32, 95, 42, 29, 29, 42, 30, 32}, - { 32, 27, 99, 34, 31, 41, 29, 32}, - { 32, 34, 21, 104, 31, 42, 30, 32}, - { 32, 45, 30, 32, 9, 88, 40, 30}, - { 32, 77, 38, 30, 9, 76, 38, 30}, - { 32, 38, 78, 33, 14, 67, 37, 30}, - { 32, 30, 30, 87, 20, 59, 38, 31}, - { 33, 37, 32, 32, 27, 18, 106, 34}, - { 34, 44, 34, 31, 25, 17, 108, 31}, - { 36, 39, 45, 31, 24, 15, 108, 30}, - { 37, 31, 31, 54, 25, 14, 101, 32}, - { 36, 33, 32, 30, 29, 37, 13, 110}, - { 39, 32, 32, 29, 27, 37, 15, 108}, - { 44, 33, 31, 27, 25, 37, 16, 106}, - { 47, 30, 31, 32, 25, 34, 19, 102} - }, - { - { 32, 48, 35, 35, 47, 68, 31, 31}, - { 32, 33, 59, 40, 27, 71, 33, 30}, - { 32, 29, 47, 65, 24, 62, 37, 30}, - { 33, 33, 31, 81, 26, 50, 42, 32}, - { 32, 30, 40, 38, 30, 70, 55, 31}, - { 32, 20, 46, 50, 26, 55, 64, 31}, - { 33, 30, 29, 66, 25, 41, 72, 33}, - { 36, 34, 27, 69, 26, 31, 67, 39}, - { 33, 28, 36, 40, 30, 26, 85, 47}, - { 36, 27, 33, 50, 31, 20, 79, 53}, - { 43, 30, 26, 57, 28, 17, 67, 62}, - { 51, 27, 28, 55, 22, 23, 49, 70}, - { 38, 29, 32, 39, 28, 30, 22, 104}, - { 51, 31, 28, 43, 24, 31, 17, 102}, - { 69, 23, 30, 40, 15, 38, 10, 95}, - { 77, 13, 35, 38, 8, 43, 8, 90} - }, - { - { 32, 38, 32, 33, 101, 40, 29, 32}, - { 32, 40, 37, 32, 100, 36, 30, 32}, - { 32, 37, 46, 35, 94, 33, 30, 31}, - { 33, 34, 30, 62, 81, 35, 30, 31}, - { 32, 32, 33, 32, 22, 102, 39, 29}, - { 32, 31, 33, 33, 26, 104, 34, 28}, - { 33, 33, 33, 33, 31, 103, 32, 28}, - { 33, 32, 34, 36, 37, 94, 33, 28}, - { 32, 33, 32, 32, 34, 24, 99, 36}, - { 32, 34, 33, 33, 33, 30, 98, 32}, - { 33, 33, 34, 33, 31, 37, 95, 29}, - { 33, 33, 33, 36, 30, 46, 85, 31}, - { 32, 33, 32, 33, 30, 34, 23, 104}, - { 32, 34, 33, 33, 31, 32, 30, 98}, - { 32, 33, 34, 34, 31, 29, 39, 91}, - { 33, 33, 32, 37, 32, 30, 47, 82} - }, - { - { 32, 52, 48, 31, 38, 76, 26, 32}, - { 33, 19, 62, 50, 25, 50, 51, 31}, - { 33, 30, 20, 74, 29, 29, 54, 51}, - { 34, 35, 23, 56, 31, 25, 41, 76}, - { 33, 25, 38, 39, 28, 39, 83, 35}, - { 35, 28, 25, 47, 31, 23, 57, 74}, - { 37, 35, 22, 38, 31, 27, 30, 101}, - { 38, 32, 33, 29, 30, 31, 27, 103}, - { 34, 32, 27, 37, 32, 25, 41, 92}, - { 38, 33, 28, 32, 30, 31, 18, 111}, - { 40, 32, 33, 27, 29, 33, 18, 111}, - { 40, 32, 34, 27, 28, 33, 23, 105}, - { 35, 32, 30, 33, 31, 33, 20, 107}, - { 38, 31, 33, 30, 29, 33, 21, 106}, - { 40, 32, 33, 29, 29, 34, 22, 105}, - { 40, 32, 33, 30, 29, 34, 24, 101} - }, - { - { 32, 28, 31, 33, 92, 33, 30, 31}, - { 33, 30, 28, 33, 71, 26, 32, 30}, - { 33, 60, 26, 33, 47, 28, 33, 30}, - { 33, 63, 44, 36, 37, 31, 33, 30}, - { 33, 30, 31, 33, 43, 90, 33, 29}, - { 33, 28, 29, 34, 71, 71, 26, 30}, - { 33, 30, 26, 33, 86, 45, 28, 30}, - { 33, 38, 29, 32, 74, 32, 33, 29}, - { 33, 32, 30, 32, 29, 41, 95, 27}, - { 34, 31, 29, 33, 26, 71, 73, 22}, - { 34, 31, 29, 33, 37, 88, 46, 25}, - { 33, 32, 28, 34, 55, 75, 36, 28}, - { 34, 31, 30, 32, 33, 27, 43, 89}, - { 35, 32, 28, 33, 33, 23, 77, 59}, - { 34, 33, 28, 33, 30, 35, 91, 37}, - { 34, 34, 28, 34, 33, 53, 74, 31} - }, - { - { 33, 49, 26, 32, 26, 52, 28, 31}, - { 33, 71, 72, 24, 30, 32, 34, 31}, - { 32, 23, 70, 68, 32, 32, 32, 32}, - { 31, 33, 21, 106, 33, 32, 32, 33}, - { 34, 47, 32, 29, 5, 86, 44, 26}, - { 34, 44, 89, 28, 28, 37, 33, 30}, - { 32, 27, 46, 89, 33, 31, 31, 32}, - { 30, 33, 20, 107, 33, 33, 32, 33}, - { 35, 39, 42, 27, 26, 24, 92, 35}, - { 34, 27, 87, 43, 30, 34, 38, 31}, - { 31, 31, 32, 100, 32, 33, 30, 32}, - { 29, 32, 22, 106, 33, 33, 32, 33}, - { 35, 29, 47, 32, 32, 32, 17, 100}, - { 34, 24, 69, 60, 34, 33, 28, 44}, - { 31, 33, 31, 99, 32, 33, 32, 31}, - { 29, 33, 25, 103, 33, 33, 32, 35} - } -}; - -const uint8_t uvg_mip_matrix_16x16[6][64][7] = -{ - { - { 42, 37, 33, 27, 44, 33, 35}, - { 71, 39, 34, 24, 36, 35, 36}, - { 77, 46, 35, 33, 30, 34, 36}, - { 64, 60, 35, 33, 31, 32, 36}, - { 49, 71, 38, 32, 32, 31, 36}, - { 42, 66, 50, 33, 31, 32, 36}, - { 40, 52, 67, 33, 31, 32, 35}, - { 38, 43, 75, 33, 32, 32, 35}, - { 56, 40, 33, 26, 43, 38, 36}, - { 70, 49, 34, 30, 28, 38, 38}, - { 65, 57, 36, 34, 28, 33, 39}, - { 59, 60, 39, 33, 30, 31, 38}, - { 55, 60, 43, 33, 30, 31, 38}, - { 51, 61, 47, 33, 30, 32, 37}, - { 46, 62, 51, 34, 30, 32, 37}, - { 42, 60, 55, 33, 31, 32, 37}, - { 60, 42, 34, 30, 37, 43, 38}, - { 68, 52, 35, 35, 22, 37, 40}, - { 62, 58, 37, 34, 28, 31, 40}, - { 58, 59, 41, 33, 30, 30, 39}, - { 56, 59, 44, 34, 30, 31, 38}, - { 53, 60, 45, 33, 30, 31, 38}, - { 49, 65, 45, 33, 30, 31, 38}, - { 45, 64, 47, 33, 31, 32, 38}, - { 59, 44, 35, 31, 34, 43, 41}, - { 66, 53, 36, 35, 25, 31, 43}, - { 61, 58, 38, 34, 29, 30, 40}, - { 59, 57, 41, 33, 30, 31, 39}, - { 57, 58, 43, 33, 30, 31, 39}, - { 54, 61, 43, 33, 31, 31, 39}, - { 51, 64, 43, 33, 31, 31, 39}, - { 48, 64, 45, 33, 32, 31, 39}, - { 57, 45, 35, 30, 35, 40, 44}, - { 65, 54, 37, 33, 33, 24, 44}, - { 63, 56, 38, 34, 30, 29, 39}, - { 61, 56, 41, 34, 30, 32, 39}, - { 58, 58, 42, 33, 31, 31, 39}, - { 54, 62, 41, 33, 31, 31, 39}, - { 51, 65, 42, 33, 31, 31, 39}, - { 48, 63, 43, 33, 32, 31, 39}, - { 55, 46, 35, 30, 36, 38, 47}, - { 65, 53, 37, 32, 36, 26, 40}, - { 65, 54, 38, 33, 31, 30, 38}, - { 63, 55, 39, 33, 30, 32, 38}, - { 59, 58, 40, 33, 31, 31, 39}, - { 54, 64, 40, 33, 31, 30, 40}, - { 49, 66, 40, 32, 32, 30, 41}, - { 48, 64, 42, 32, 32, 30, 41}, - { 54, 46, 35, 30, 34, 39, 49}, - { 64, 52, 36, 32, 34, 34, 35}, - { 65, 53, 37, 33, 32, 32, 37}, - { 63, 55, 38, 33, 31, 31, 39}, - { 59, 60, 38, 33, 31, 31, 40}, - { 54, 64, 38, 33, 32, 30, 40}, - { 49, 66, 39, 33, 32, 29, 41}, - { 47, 64, 42, 32, 33, 29, 42}, - { 51, 46, 35, 31, 33, 37, 54}, - { 61, 51, 36, 32, 33, 38, 36}, - { 63, 53, 37, 32, 32, 34, 37}, - { 62, 55, 37, 33, 32, 32, 39}, - { 58, 59, 37, 33, 32, 31, 40}, - { 53, 63, 38, 33, 32, 31, 40}, - { 49, 64, 40, 33, 33, 30, 41}, - { 46, 62, 42, 33, 33, 30, 42} - }, - { - { 39, 34, 33, 58, 44, 31, 32}, - { 60, 38, 32, 40, 51, 30, 31}, - { 73, 49, 31, 39, 48, 32, 31}, - { 60, 73, 30, 39, 46, 33, 32}, - { 43, 87, 35, 38, 45, 33, 32}, - { 35, 78, 54, 36, 45, 33, 32}, - { 33, 47, 86, 35, 44, 33, 32}, - { 31, 17, 114, 34, 44, 34, 33}, - { 43, 37, 32, 53, 70, 30, 31}, - { 53, 50, 30, 42, 72, 31, 30}, - { 52, 66, 30, 39, 70, 32, 30}, - { 46, 78, 35, 37, 68, 34, 30}, - { 43, 75, 48, 37, 66, 34, 30}, - { 40, 62, 68, 35, 65, 35, 30}, - { 33, 37, 97, 33, 62, 37, 31}, - { 26, 14, 122, 32, 59, 38, 33}, - { 40, 39, 33, 34, 87, 37, 30}, - { 45, 54, 32, 34, 84, 41, 29}, - { 41, 70, 35, 33, 83, 40, 29}, - { 37, 73, 44, 32, 82, 40, 30}, - { 37, 65, 60, 31, 81, 41, 29}, - { 35, 48, 82, 30, 79, 43, 29}, - { 28, 27, 108, 28, 76, 45, 30}, - { 19, 11, 127, 27, 70, 46, 32}, - { 38, 40, 34, 27, 73, 62, 28}, - { 39, 54, 35, 30, 73, 62, 28}, - { 33, 65, 41, 29, 75, 59, 28}, - { 30, 65, 53, 27, 76, 58, 29}, - { 29, 53, 72, 26, 77, 58, 29}, - { 27, 35, 95, 24, 77, 60, 28}, - { 19, 19, 117, 23, 74, 61, 30}, - { 9, 16, 127, 23, 68, 60, 34}, - { 35, 40, 35, 29, 44, 89, 30}, - { 33, 51, 39, 29, 49, 86, 30}, - { 28, 57, 49, 28, 53, 83, 30}, - { 24, 52, 65, 26, 56, 82, 30}, - { 22, 39, 86, 24, 58, 82, 30}, - { 18, 22, 108, 23, 59, 82, 31}, - { 10, 13, 125, 22, 58, 80, 33}, - { 0, 19, 127, 22, 56, 74, 40}, - { 33, 40, 36, 31, 28, 90, 45}, - { 29, 46, 44, 29, 31, 92, 43}, - { 24, 45, 58, 28, 34, 91, 43}, - { 19, 37, 78, 26, 37, 91, 43}, - { 15, 22, 99, 25, 38, 91, 42}, - { 11, 11, 118, 24, 39, 90, 44}, - { 2, 11, 127, 23, 41, 85, 48}, - { 0, 17, 127, 23, 43, 75, 55}, - { 31, 37, 39, 30, 28, 54, 82}, - { 27, 37, 52, 28, 30, 58, 79}, - { 22, 30, 70, 27, 32, 58, 79}, - { 15, 19, 91, 26, 33, 58, 79}, - { 10, 8, 111, 25, 34, 58, 79}, - { 5, 2, 125, 25, 35, 57, 80}, - { 0, 9, 127, 25, 36, 53, 84}, - { 0, 13, 127, 25, 39, 47, 88}, - { 28, 29, 46, 28, 39, 2, 123}, - { 24, 24, 62, 27, 41, 1, 125}, - { 19, 14, 81, 25, 43, 0, 126}, - { 13, 4, 101, 24, 44, 0, 127}, - { 6, 0, 116, 23, 45, 0, 127}, - { 0, 0, 126, 23, 45, 1, 127}, - { 0, 4, 127, 25, 44, 2, 127}, - { 0, 9, 127, 25, 44, 3, 127} - }, - { - { 30, 32, 32, 42, 34, 32, 32}, - { 63, 26, 34, 16, 38, 32, 32}, - { 98, 26, 34, 25, 34, 33, 32}, - { 75, 61, 30, 31, 32, 33, 32}, - { 36, 94, 32, 30, 33, 32, 32}, - { 26, 76, 58, 30, 33, 32, 32}, - { 30, 39, 91, 31, 32, 33, 31}, - { 32, 23, 105, 32, 32, 32, 32}, - { 34, 30, 33, 31, 52, 29, 32}, - { 66, 24, 34, 11, 41, 33, 32}, - { 97, 28, 34, 24, 34, 33, 32}, - { 71, 65, 30, 30, 32, 33, 32}, - { 34, 92, 35, 30, 33, 32, 32}, - { 26, 70, 64, 29, 34, 32, 32}, - { 30, 37, 94, 30, 33, 32, 31}, - { 32, 23, 105, 31, 33, 33, 31}, - { 37, 29, 33, 8, 79, 27, 32}, - { 71, 22, 35, 5, 50, 32, 32}, - { 98, 29, 34, 23, 34, 34, 32}, - { 66, 70, 30, 31, 31, 33, 32}, - { 31, 92, 38, 30, 33, 32, 32}, - { 26, 66, 68, 29, 34, 32, 31}, - { 30, 34, 97, 30, 34, 33, 31}, - { 31, 22, 106, 30, 34, 33, 31}, - { 40, 28, 34, 0, 76, 46, 28}, - { 76, 21, 35, 0, 55, 35, 32}, - { 97, 32, 34, 21, 37, 33, 33}, - { 61, 75, 29, 30, 32, 32, 32}, - { 29, 92, 40, 29, 33, 32, 32}, - { 26, 62, 73, 29, 34, 32, 31}, - { 29, 32, 99, 30, 34, 33, 30}, - { 31, 22, 107, 30, 34, 33, 31}, - { 42, 27, 34, 1, 48, 79, 25}, - { 80, 20, 35, 0, 48, 47, 31}, - { 94, 36, 32, 17, 40, 33, 33}, - { 55, 80, 29, 27, 35, 31, 32}, - { 27, 90, 43, 28, 34, 32, 31}, - { 26, 58, 76, 29, 33, 33, 30}, - { 29, 30, 101, 29, 34, 34, 30}, - { 31, 21, 108, 29, 35, 34, 30}, - { 44, 26, 34, 6, 30, 80, 40}, - { 81, 21, 35, 0, 41, 52, 35}, - { 90, 41, 31, 14, 41, 35, 33}, - { 51, 82, 29, 24, 37, 32, 32}, - { 27, 87, 47, 27, 35, 32, 31}, - { 26, 54, 79, 29, 34, 33, 30}, - { 29, 29, 102, 28, 34, 33, 30}, - { 31, 21, 108, 28, 35, 33, 31}, - { 47, 26, 34, 7, 34, 44, 75}, - { 80, 24, 34, 0, 41, 41, 50}, - { 84, 45, 31, 12, 40, 36, 36}, - { 49, 81, 31, 22, 37, 33, 32}, - { 28, 81, 51, 26, 35, 33, 31}, - { 28, 51, 81, 28, 34, 33, 30}, - { 29, 30, 101, 28, 35, 33, 31}, - { 31, 22, 107, 28, 35, 33, 32}, - { 48, 27, 34, 10, 40, 16, 97}, - { 75, 27, 34, 3, 42, 26, 66}, - { 77, 47, 33, 12, 40, 32, 43}, - { 49, 75, 36, 21, 37, 33, 35}, - { 32, 72, 55, 25, 36, 33, 32}, - { 30, 49, 81, 27, 35, 33, 31}, - { 30, 32, 98, 28, 35, 32, 32}, - { 31, 24, 104, 28, 35, 32, 33} - }, - { - { 36, 29, 33, 43, 47, 29, 31}, - { 74, 20, 35, 19, 47, 34, 32}, - { 92, 35, 32, 29, 31, 40, 34}, - { 53, 80, 26, 33, 28, 36, 37}, - { 24, 91, 41, 31, 31, 31, 38}, - { 25, 57, 74, 31, 32, 30, 37}, - { 32, 28, 99, 32, 32, 29, 36}, - { 34, 20, 105, 33, 32, 30, 35}, - { 50, 26, 34, 33, 74, 30, 31}, - { 75, 28, 33, 23, 46, 47, 33}, - { 64, 58, 29, 30, 26, 46, 40}, - { 31, 85, 37, 31, 27, 33, 44}, - { 22, 67, 64, 30, 31, 28, 42}, - { 29, 35, 93, 31, 32, 27, 40}, - { 33, 20, 105, 32, 33, 27, 37}, - { 34, 19, 106, 33, 32, 29, 36}, - { 51, 29, 33, 25, 72, 51, 30}, - { 61, 42, 31, 30, 31, 60, 39}, - { 40, 70, 34, 32, 24, 41, 50}, - { 22, 72, 54, 30, 31, 27, 50}, - { 25, 44, 83, 30, 33, 25, 44}, - { 32, 23, 102, 32, 33, 26, 40}, - { 34, 18, 107, 32, 33, 28, 37}, - { 34, 19, 105, 33, 32, 30, 35}, - { 45, 35, 32, 30, 39, 79, 33}, - { 43, 53, 33, 35, 24, 53, 55}, - { 27, 67, 45, 32, 29, 27, 61}, - { 22, 53, 72, 30, 33, 22, 52}, - { 28, 31, 95, 31, 33, 25, 43}, - { 32, 20, 105, 32, 33, 27, 38}, - { 34, 18, 107, 32, 32, 29, 36}, - { 34, 20, 105, 33, 31, 31, 35}, - { 38, 40, 32, 35, 23, 72, 54}, - { 31, 55, 39, 34, 29, 32, 73}, - { 22, 57, 60, 31, 35, 18, 64}, - { 25, 39, 86, 31, 35, 22, 49}, - { 30, 24, 101, 32, 33, 27, 40}, - { 33, 19, 106, 32, 32, 30, 36}, - { 34, 18, 107, 33, 31, 31, 35}, - { 34, 20, 104, 33, 31, 32, 34}, - { 33, 42, 35, 34, 28, 39, 82}, - { 26, 51, 50, 33, 34, 18, 80}, - { 23, 46, 74, 31, 35, 20, 59}, - { 27, 32, 93, 32, 34, 26, 44}, - { 31, 22, 103, 32, 32, 30, 37}, - { 33, 19, 106, 33, 31, 31, 35}, - { 34, 19, 106, 33, 31, 32, 34}, - { 35, 21, 103, 34, 31, 32, 34}, - { 29, 41, 41, 33, 34, 20, 92}, - { 24, 44, 62, 34, 35, 18, 73}, - { 24, 37, 83, 34, 33, 25, 52}, - { 28, 28, 97, 33, 32, 30, 40}, - { 32, 23, 103, 33, 31, 32, 36}, - { 34, 20, 105, 34, 30, 33, 34}, - { 35, 20, 104, 34, 30, 33, 33}, - { 35, 22, 102, 34, 30, 33, 34}, - { 27, 38, 51, 34, 34, 20, 86}, - { 26, 37, 71, 35, 34, 24, 64}, - { 27, 33, 87, 35, 32, 30, 47}, - { 30, 28, 96, 34, 31, 32, 39}, - { 32, 24, 100, 35, 30, 32, 36}, - { 34, 23, 101, 34, 30, 33, 34}, - { 35, 23, 101, 34, 30, 32, 34}, - { 34, 24, 99, 35, 30, 33, 34} - }, - { - { 39, 30, 31, 67, 33, 34, 31}, - { 72, 21, 32, 43, 39, 33, 31}, - { 100, 23, 32, 35, 39, 34, 31}, - { 75, 63, 24, 32, 38, 34, 32}, - { 32, 98, 26, 29, 37, 35, 32}, - { 22, 77, 55, 29, 36, 35, 31}, - { 31, 37, 90, 31, 35, 35, 32}, - { 35, 22, 100, 33, 33, 36, 33}, - { 47, 29, 32, 74, 54, 32, 31}, - { 71, 24, 32, 60, 50, 36, 30}, - { 86, 31, 30, 46, 48, 37, 30}, - { 65, 63, 25, 34, 46, 39, 30}, - { 33, 85, 32, 28, 43, 40, 30}, - { 26, 64, 60, 27, 39, 41, 30}, - { 33, 33, 87, 29, 35, 41, 31}, - { 37, 23, 93, 32, 33, 41, 32}, - { 41, 32, 32, 45, 84, 32, 32}, - { 55, 31, 32, 50, 70, 40, 30}, - { 62, 37, 31, 45, 61, 45, 29}, - { 53, 55, 31, 36, 55, 48, 29}, - { 38, 63, 40, 29, 48, 50, 28}, - { 34, 49, 60, 27, 43, 51, 29}, - { 38, 30, 78, 28, 38, 50, 31}, - { 40, 24, 83, 30, 36, 48, 33}, - { 35, 33, 33, 29, 75, 58, 29}, - { 39, 35, 33, 34, 68, 59, 29}, - { 41, 39, 34, 36, 61, 62, 29}, - { 41, 43, 37, 33, 54, 64, 28}, - { 41, 43, 45, 30, 48, 65, 29}, - { 42, 36, 56, 27, 44, 63, 30}, - { 42, 30, 65, 27, 41, 60, 33}, - { 42, 28, 68, 28, 37, 56, 36}, - { 33, 34, 33, 31, 42, 88, 30}, - { 31, 36, 34, 31, 44, 84, 31}, - { 31, 37, 35, 32, 43, 83, 31}, - { 35, 35, 39, 32, 40, 82, 31}, - { 40, 32, 44, 31, 38, 81, 31}, - { 44, 30, 48, 30, 37, 78, 33}, - { 44, 30, 52, 28, 37, 72, 36}, - { 43, 30, 55, 29, 35, 66, 40}, - { 32, 33, 33, 34, 25, 85, 48}, - { 30, 34, 34, 33, 25, 88, 44}, - { 30, 34, 36, 34, 25, 90, 41}, - { 33, 32, 38, 34, 25, 90, 40}, - { 38, 29, 41, 34, 26, 88, 40}, - { 42, 29, 41, 33, 27, 85, 41}, - { 43, 30, 42, 31, 28, 80, 43}, - { 42, 31, 45, 31, 30, 72, 47}, - { 32, 33, 33, 33, 26, 54, 79}, - { 31, 32, 34, 35, 20, 68, 68}, - { 32, 32, 35, 36, 17, 76, 62}, - { 34, 31, 36, 36, 17, 79, 59}, - { 37, 29, 37, 36, 18, 78, 58}, - { 39, 29, 37, 35, 20, 77, 58}, - { 41, 30, 37, 34, 22, 74, 58}, - { 40, 31, 40, 32, 26, 68, 59}, - { 33, 31, 34, 33, 29, 31, 98}, - { 34, 30, 34, 35, 23, 45, 88}, - { 34, 31, 34, 36, 20, 54, 82}, - { 35, 31, 34, 36, 18, 59, 78}, - { 36, 31, 34, 37, 19, 60, 76}, - { 38, 30, 34, 36, 20, 61, 74}, - { 39, 31, 35, 35, 22, 60, 73}, - { 39, 31, 37, 34, 24, 59, 71} - }, - { - { 30, 33, 32, 55, 32, 32, 32}, - { 47, 30, 31, 29, 36, 32, 32}, - { 81, 28, 32, 28, 34, 32, 32}, - { 85, 46, 29, 32, 32, 33, 32}, - { 54, 82, 26, 32, 32, 33, 32}, - { 30, 90, 38, 31, 32, 33, 32}, - { 30, 56, 73, 31, 33, 32, 32}, - { 37, 21, 102, 32, 32, 32, 32}, - { 33, 32, 31, 68, 39, 31, 31}, - { 38, 32, 31, 43, 34, 33, 31}, - { 63, 30, 31, 29, 34, 32, 32}, - { 82, 37, 30, 29, 33, 32, 32}, - { 71, 63, 27, 31, 32, 33, 32}, - { 44, 86, 30, 30, 33, 33, 32}, - { 33, 72, 55, 30, 32, 32, 31}, - { 37, 37, 86, 31, 32, 33, 31}, - { 34, 33, 32, 60, 61, 29, 32}, - { 36, 33, 31, 56, 38, 32, 31}, - { 51, 30, 31, 38, 33, 33, 32}, - { 75, 31, 31, 30, 33, 33, 32}, - { 80, 47, 29, 30, 32, 33, 31}, - { 60, 73, 27, 30, 33, 33, 31}, - { 41, 78, 41, 30, 33, 32, 31}, - { 38, 53, 68, 30, 32, 33, 31}, - { 33, 33, 32, 43, 77, 35, 30}, - { 35, 33, 31, 55, 54, 29, 32}, - { 43, 32, 31, 46, 39, 31, 32}, - { 64, 30, 31, 35, 34, 33, 32}, - { 79, 37, 30, 31, 32, 33, 31}, - { 73, 57, 28, 30, 32, 33, 31}, - { 54, 73, 33, 30, 32, 33, 31}, - { 43, 64, 52, 30, 32, 33, 31}, - { 33, 33, 32, 34, 68, 58, 28}, - { 34, 33, 31, 45, 70, 33, 31}, - { 38, 33, 31, 48, 52, 29, 32}, - { 54, 31, 31, 40, 39, 31, 32}, - { 73, 32, 31, 34, 34, 33, 31}, - { 77, 45, 29, 31, 32, 32, 32}, - { 65, 63, 30, 31, 31, 33, 31}, - { 51, 66, 42, 30, 32, 33, 31}, - { 33, 32, 32, 34, 44, 81, 31}, - { 34, 33, 31, 38, 66, 52, 28}, - { 36, 33, 30, 44, 62, 34, 31}, - { 47, 31, 31, 43, 48, 30, 32}, - { 64, 31, 31, 38, 38, 32, 32}, - { 75, 38, 30, 33, 34, 32, 32}, - { 71, 53, 30, 31, 32, 33, 32}, - { 59, 61, 37, 30, 32, 33, 32}, - { 33, 32, 31, 35, 31, 71, 54}, - { 34, 33, 31, 37, 49, 70, 33}, - { 36, 33, 31, 41, 60, 48, 30}, - { 43, 32, 31, 43, 54, 35, 31}, - { 56, 31, 31, 40, 44, 32, 32}, - { 68, 35, 30, 36, 37, 32, 32}, - { 70, 45, 30, 33, 34, 33, 32}, - { 63, 55, 35, 31, 33, 33, 32}, - { 33, 32, 31, 33, 34, 36, 87}, - { 34, 32, 31, 36, 38, 62, 52}, - { 36, 33, 31, 39, 50, 57, 36}, - { 41, 33, 31, 41, 53, 43, 33}, - { 50, 33, 31, 41, 48, 36, 32}, - { 59, 35, 31, 37, 41, 34, 32}, - { 65, 42, 31, 35, 36, 33, 32}, - { 62, 49, 35, 33, 34, 34, 33} - } -}; diff --git a/src/strategies/avx2/dct-avx2.c b/src/strategies/avx2/dct-avx2.c index 036eed98..cec0aa22 100644 --- a/src/strategies/avx2/dct-avx2.c +++ b/src/strategies/avx2/dct-avx2.c @@ -3200,7 +3200,7 @@ static void fast_forward_tr_4x32_avx2(const int16_t* src, int16_t* dst, tr_type_ ver_coeff = ff_dct8_4x32_coeff_ver; } - int16_t v_hor_pass_out[4*32]; + ALIGNED(32) int16_t v_hor_pass_out[4*32]; fast_forward_tr_4xN_avx2_hor(src, (__m256i*)v_hor_pass_out, hor_coeff, shift_1st, height, 0, skip_width); @@ -5636,7 +5636,7 @@ static void fast_forward_tr_16x32_avx2(const int16_t* src, int16_t* dst, tr_type ver_coeff = ff_dct8_16x32_coeff_ver; } - int16_t v_hor_pass_out[32*16]; + ALIGNED(32) int16_t v_hor_pass_out[32*16]; fast_forward_DCT2_B16_avx2_hor(src, (__m256i*)v_hor_pass_out, hor_coeff, shift_1st, height, 0, skip_width); @@ -5948,7 +5948,7 @@ static void fast_forward_DCT2_B32_avx2_hor(const int16_t* src, __m256i* dst, con v_trunc_0 = _mm256_packs_epi32(v_trunc_0, v_trunc_1); v_trunc_1 = _mm256_packs_epi32(v_trunc_2, v_trunc_3); - if(line == 32) { + if(line == 32 || line == 1) { v_trunc_0 = _mm256_permute4x64_epi64(v_trunc_0, _MM_SHUFFLE(3, 1, 2, 0)); v_trunc_1 = _mm256_permute4x64_epi64(v_trunc_1, _MM_SHUFFLE(3, 1, 2, 0)); } diff --git a/src/strategies/avx2/depquant-avx2.c b/src/strategies/avx2/depquant-avx2.c index 915eb3b2..0646f1da 100644 --- a/src/strategies/avx2/depquant-avx2.c +++ b/src/strategies/avx2/depquant-avx2.c @@ -647,7 +647,7 @@ static void update_state_eos_avx2(context_store* ctxs, const uint32_t scan_pos, for (int i = 0; i < numSbb * 4; i += 32) { __m256i sbb_flags = _mm256_loadu_si256((__m256i*)(&cc->m_allSbbCtx[cc->m_prev_sbb_ctx_offset].sbbFlags[i])); sbb_flags = _mm256_shuffle_epi8(sbb_flags, inc_ref_state); - _mm256_store_si256((__m256i*)&sbbFlags[i], sbb_flags); + _mm256_storeu_si256((__m256i*)&sbbFlags[i], sbb_flags); } } // The first 16 variables will be loaded from the previous state so this can be started from 16 diff --git a/src/strategies/avx2/intra-avx2.c b/src/strategies/avx2/intra-avx2.c index 838bad91..24acca9e 100644 --- a/src/strategies/avx2/intra-avx2.c +++ b/src/strategies/avx2/intra-avx2.c @@ -32,1042 +32,6631 @@ #include "strategies/avx2/intra-avx2.h" + #if COMPILE_INTEL_AVX2 && defined X86_64 #include "uvg266.h" +#include "cu.h" +#include "tables.h" #if UVG_BIT_DEPTH == 8 #include +#include #include +#include +#include -#include "strategyselector.h" -#include "strategies/missing-intel-intrinsics.h" +#include "global.h" +#include "intra-avx2.h" +#include "intra_avx2_tables.h" +#include "strategies/avx2/mip_data_avx2.h" +#include "uvg_math.h" - /** - * \brief Generate angular predictions. - * \param cu_loc CU locationand size data. - * \param intra_mode Angular mode in range 2..34. - * \param channel_type Color channel. - * \param in_ref_above Pointer to -1 index of above reference, length=width*2+1. - * \param in_ref_left Pointer to -1 index of left reference, length=width*2+1. - * \param dst Buffer of size width*width. - * \param multi_ref_idx Reference line index for use with MRL. - */ -static void uvg_angular_pred_avx2( - const cu_loc_t* const cu_loc, - const int_fast8_t intra_mode, - const int_fast8_t channel_type, - const uvg_pixel *const in_ref_above, - const uvg_pixel *const in_ref_left, - uvg_pixel *const dst, - const uint8_t multi_ref_idx, - const uint8_t isp_mode, - const int cu_dim) -{ - // ISP_TODO: non-square block implementation, height is passed but not used - const int width = channel_type == COLOR_Y ? cu_loc->width : cu_loc->chroma_width; - const int height = channel_type == COLOR_Y ? cu_loc->height : cu_loc->chroma_height; - const int log2_width = uvg_g_convert_to_log2[width]; - const int log2_height = uvg_g_convert_to_log2[height]; + #include "strategyselector.h" + #include "strategies/missing-intel-intrinsics.h" - assert((log2_width >= 2 && log2_width <= 5) && (log2_height >= 2 && log2_height <= 5)); - assert(intra_mode >= 2 && intra_mode <= 66); - // TODO: implement handling of MRL - uint8_t multi_ref_index = channel_type == COLOR_Y ? multi_ref_idx : 0; - uint8_t isp = isp_mode; +ALIGNED(32) static const int16_t cubic_filter[32][4] = +{ + { 0, 64, 0, 0 }, + { -1, 63, 2, 0 }, + { -2, 62, 4, 0 }, + { -2, 60, 7, -1 }, + { -2, 58, 10, -2 }, + { -3, 57, 12, -2 }, + { -4, 56, 14, -2 }, + { -4, 55, 15, -2 }, + { -4, 54, 16, -2 }, + { -5, 53, 18, -2 }, + { -6, 52, 20, -2 }, + { -6, 49, 24, -3 }, + { -6, 46, 28, -4 }, + { -5, 44, 29, -4 }, + { -4, 42, 30, -4 }, + { -4, 39, 33, -4 }, + { -4, 36, 36, -4 }, + { -4, 33, 39, -4 }, + { -4, 30, 42, -4 }, + { -4, 29, 44, -5 }, + { -4, 28, 46, -6 }, + { -3, 24, 49, -6 }, + { -2, 20, 52, -6 }, + { -2, 18, 53, -5 }, + { -2, 16, 54, -4 }, + { -2, 15, 55, -4 }, + { -2, 14, 56, -4 }, + { -2, 12, 57, -3 }, + { -2, 10, 58, -2 }, + { -1, 7, 60, -2 }, + { 0, 4, 62, -2 }, + { 0, 2, 63, -1 }, +}; + + +// Specified in JVET-T2001 8.4.5.2.13 Table 25 +// These are the fC interpolation filter coefficients +ALIGNED(32) static const int8_t cubic_filter_8bit_c[32][4] = +{ + { 0, 64, 0, 0 }, + { -1, 63, 2, 0 }, + { -2, 62, 4, 0 }, + { -2, 60, 7, -1 }, + { -2, 58, 10, -2 }, + { -3, 57, 12, -2 }, + { -4, 56, 14, -2 }, + { -4, 55, 15, -2 }, + { -4, 54, 16, -2 }, + { -5, 53, 18, -2 }, + { -6, 52, 20, -2 }, + { -6, 49, 24, -3 }, + { -6, 46, 28, -4 }, + { -5, 44, 29, -4 }, + { -4, 42, 30, -4 }, + { -4, 39, 33, -4 }, + { -4, 36, 36, -4 }, + { -4, 33, 39, -4 }, + { -4, 30, 42, -4 }, + { -4, 29, 44, -5 }, + { -4, 28, 46, -6 }, + { -3, 24, 49, -6 }, + { -2, 20, 52, -6 }, + { -2, 18, 53, -5 }, + { -2, 16, 54, -4 }, + { -2, 15, 55, -4 }, + { -2, 14, 56, -4 }, + { -2, 12, 57, -3 }, + { -2, 10, 58, -2 }, + { -1, 7, 60, -2 }, + { 0, 4, 62, -2 }, + { 0, 2, 63, -1 }, +}; + +// Specified in JVET-T2001 8.4.5.2.13 Table 25 +// These are the fG interpolation filter coefficients +ALIGNED(32) static const int8_t cubic_filter_8bit_g[32][4] = +{ + {16, 32, 16, 0}, + {16, 32, 16, 0}, + {15, 31, 17, 1}, + {15, 31, 17, 1}, + {14, 30, 18, 2}, + {14, 30, 18, 2}, + {13, 29, 19, 3}, + {13, 29, 19, 3}, + {12, 28, 20, 4}, + {12, 28, 20, 4}, + {11, 27, 21, 5}, + {11, 27, 21, 5}, + {10, 26, 22, 6}, + {10, 26, 22, 6}, + { 9, 25, 23, 7}, + { 9, 25, 23, 7}, + { 8, 24, 24, 8}, + { 8, 24, 24, 8}, + { 7, 23, 25, 9}, + { 7, 23, 25, 9}, + { 6, 22, 26, 10}, + { 6, 22, 26, 10}, + { 5, 21, 27, 11}, + { 5, 21, 27, 11}, + { 4, 20, 28, 12}, + { 4, 20, 28, 12}, + { 3, 19, 29, 13}, + { 3, 19, 29, 13}, + { 2, 18, 30, 14}, + { 2, 18, 30, 14}, + { 1, 17, 31, 15}, + { 1, 17, 31, 15} +}; + + +static void angular_pred_w4_ver_avx2(uvg_pixel* dst, const uvg_pixel* ref_main, const int16_t* delta_int, const int16_t* delta_fract, const int height, const int8_t(*filter)[4]) +{ + const int width = 4; - __m256i p_shuf_01 = _mm256_setr_epi8( + const __m256i p_shuf_01 = _mm256_setr_epi8( 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b, 0x0b, 0x0c, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b, 0x0b, 0x0c ); - __m256i p_shuf_23 = _mm256_setr_epi8( + const __m256i p_shuf_23 = _mm256_setr_epi8( 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x0a, 0x0b, 0x0b, 0x0c, 0x0c, 0x0d, 0x0d, 0x0e, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x0a, 0x0b, 0x0b, 0x0c, 0x0c, 0x0d, 0x0d, 0x0e ); - __m256i w_shuf_01 = _mm256_setr_epi8( - 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, - 0x08, 0x0a, 0x08, 0x0a, 0x08, 0x0a, 0x08, 0x0a, - 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, - 0x08, 0x0a, 0x08, 0x0a, 0x08, 0x0a, 0x08, 0x0a + const __m256i w_shuf_01 = _mm256_setr_epi8( + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, + 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, + 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, + 0x0c, 0x0d, 0x0c, 0x0d, 0x0c, 0x0d, 0x0c, 0x0d ); - __m256i w_shuf_23 = _mm256_setr_epi8( - 0x04, 0x06, 0x04, 0x06, 0x04, 0x06, 0x04, 0x06, - 0x0c, 0x0e, 0x0c, 0x0e, 0x0c, 0x0e, 0x0c, 0x0e, - 0x04, 0x06, 0x04, 0x06, 0x04, 0x06, 0x04, 0x06, - 0x0c, 0x0e, 0x0c, 0x0e, 0x0c, 0x0e, 0x0c, 0x0e + const __m256i w_shuf_23 = _mm256_setr_epi8( + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, + 0x06, 0x07, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07, + 0x0a, 0x0b, 0x0a, 0x0b, 0x0a, 0x0b, 0x0a, 0x0b, + 0x0e, 0x0f, 0x0e, 0x0f, 0x0e, 0x0f, 0x0e, 0x0f ); - static const int16_t modedisp2sampledisp[32] = { 0, 1, 2, 3, 4, 6, 8, 10, 12, 14, 16, 18, 20, 23, 26, 29, 32, 35, 39, 45, 51, 57, 64, 73, 86, 102, 128, 171, 256, 341, 512, 1024 }; - static const int16_t modedisp2invsampledisp[32] = { 0, 16384, 8192, 5461, 4096, 2731, 2048, 1638, 1365, 1170, 1024, 910, 819, 712, 630, 565, 512, 468, 420, 364, 321, 287, 256, 224, 191, 161, 128, 96, 64, 48, 32, 16 }; // (512 * 32) / sampledisp - static const int32_t pre_scale[] = { 8, 7, 6, 5, 5, 4, 4, 4, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 0, 0, 0, -1, -1, -2, -3 }; - - static const int16_t cubic_filter[32][4] = - { - { 0, 64, 0, 0 }, - { -1, 63, 2, 0 }, - { -2, 62, 4, 0 }, - { -2, 60, 7, -1 }, - { -2, 58, 10, -2 }, - { -3, 57, 12, -2 }, - { -4, 56, 14, -2 }, - { -4, 55, 15, -2 }, - { -4, 54, 16, -2 }, - { -5, 53, 18, -2 }, - { -6, 52, 20, -2 }, - { -6, 49, 24, -3 }, - { -6, 46, 28, -4 }, - { -5, 44, 29, -4 }, - { -4, 42, 30, -4 }, - { -4, 39, 33, -4 }, - { -4, 36, 36, -4 }, - { -4, 33, 39, -4 }, - { -4, 30, 42, -4 }, - { -4, 29, 44, -5 }, - { -4, 28, 46, -6 }, - { -3, 24, 49, -6 }, - { -2, 20, 52, -6 }, - { -2, 18, 53, -5 }, - { -2, 16, 54, -4 }, - { -2, 15, 55, -4 }, - { -2, 14, 56, -4 }, - { -2, 12, 57, -3 }, - { -2, 10, 58, -2 }, - { -1, 7, 60, -2 }, - { 0, 4, 62, -2 }, - { 0, 2, 63, -1 }, - }; - - // Temporary buffer for modes 11-25. - // It only needs to be big enough to hold indices from -width to width-1. - //uvg_pixel tmp_ref[2 * 128 + 3 + 33 * MAX_REF_LINE:IDX] = { 0 }; - uvg_pixel temp_main[2 * 128 + 3 + 33 * MAX_REF_LINE_IDX] = { 0 }; - uvg_pixel temp_side[2 * 128 + 3 + 33 * MAX_REF_LINE_IDX] = { 0 }; - - int32_t pred_mode = intra_mode; // ToDo: handle WAIP + // Do 4-tap intra interpolation filtering + // For a 4 width block, height must be at least 4. Handle 4 lines at once + for (int y = 0; y < height; y += 4) { + // Copy the filter to local memory + __m128i vdfract = _mm_loadu_si128((__m128i*)&delta_fract[y]); + __m128i vidxw = _mm_cvtepi16_epi32(vdfract); + __m128i all_weights = _mm_i32gather_epi32((const int32_t*)filter, vidxw, 4); + + __m256i weights256 = _mm256_insertf128_si256(_mm256_castsi128_si256(all_weights), all_weights, 1); + + // Shuffle the interpolation weights into place. + __m256i w01 = _mm256_shuffle_epi8(weights256, w_shuf_01); + __m256i w23 = _mm256_shuffle_epi8(weights256, w_shuf_23); + + // This solution assumes the delta int values to be 64-bit + // Cast from 16-bit to 64-bit. + __m128i vdelta_int = _mm_loadu_si128((__m128i*)&delta_int[y]); + __m256i vidx = _mm256_cvtepi16_epi64(vdelta_int); + + __m256i vp = _mm256_i64gather_epi64((const long long int*)ref_main, vidx, 1); + __m256i vp_01 = _mm256_shuffle_epi8(vp, p_shuf_01); + __m256i vp_23 = _mm256_shuffle_epi8(vp, p_shuf_23); + + __m256i dot_01 = _mm256_maddubs_epi16(vp_01, w01); + __m256i dot_23 = _mm256_maddubs_epi16(vp_23, w23); + __m256i sum = _mm256_add_epi16(dot_01, dot_23); + sum = _mm256_add_epi16(sum, _mm256_set1_epi16(32)); + sum = _mm256_srai_epi16(sum, 6); + + __m128i lo = _mm256_castsi256_si128(sum); + __m128i hi = _mm256_extracti128_si256(sum, 1); + __m128i packed = _mm_packus_epi16(lo, hi); + + _mm_store_si128((__m128i*)dst, packed); + dst += 16; + } +} - // Whether to swap references to always project on the left reference row. - const bool vertical_mode = intra_mode >= 34; - // Modes distance to horizontal or vertical mode. - const int_fast8_t mode_disp = vertical_mode ? pred_mode - 50 : -(pred_mode - 18); - //const int_fast8_t mode_disp = vertical_mode ? intra_mode - 26 : 10 - intra_mode; - - // Sample displacement per column in fractions of 32. - const int_fast8_t sample_disp = (mode_disp < 0 ? -1 : 1) * modedisp2sampledisp[abs(mode_disp)]; - - // TODO: replace latter width with height - int scale = MIN(2, log2_width - pre_scale[abs(mode_disp)]); +static void angular_pred_w8_h2_ver_avx2(uvg_pixel* dst, const uvg_pixel* ref_main, const int16_t* delta_int, const int16_t* delta_fract, const int height, const int8_t(*filter)[4]) +{ + //const int width = 8; - // Pointer for the reference we are interpolating from. - uvg_pixel *ref_main; - // Pointer for the other reference. - const uvg_pixel *ref_side; + const __m128i p_shuf_01 = _mm_setr_epi8( + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, + 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08 + ); - // Set ref_main and ref_side such that, when indexed with 0, they point to - // index 0 in block coordinates. - if (sample_disp < 0) { - memcpy(&temp_main[width], vertical_mode ? in_ref_above : in_ref_left, sizeof(uvg_pixel) * (width + 1 + multi_ref_index + 1)); - memcpy(&temp_side[width], vertical_mode ? in_ref_left : in_ref_above, sizeof(uvg_pixel) * (width + 1 + multi_ref_index + 1)); + const __m128i p_shuf_23 = _mm_setr_epi8( + 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, + 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a + ); - ref_main = temp_main + width; - ref_side = temp_side + width; + const __m256i w_shuf_01 = _mm256_setr_epi8( + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, + 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, + 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05 + ); - for (int i = -width; i <= -1; i++) { - ref_main[i] = ref_side[MIN((-i * modedisp2invsampledisp[abs(mode_disp)] + 256) >> 9, width)]; - } + const __m256i w_shuf_23 = _mm256_setr_epi8( + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, + 0x06, 0x07, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07, + 0x06, 0x07, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07 + ); + // Do 4-tap intra interpolation filtering + // For a 8 width block, height must be at least 2. Handle 2 lines at once + for (int y = 0; y < height; y += 2) { + // Load and shuffle filter weights + __m128i vidxw = _mm_loadu_si128((__m128i*)&delta_fract[y]); + __m128i vidxw32 = _mm_cvtepi16_epi32(vidxw); + __m128i all_weights = _mm_i32gather_epi32((const int32_t*)filter, vidxw32, 4); + __m256i aw256 = _mm256_inserti128_si256(_mm256_castsi128_si256(all_weights), all_weights, 1); - //const uint32_t index_offset = width + 1; - //const int32_t last_index = width; - //const int_fast32_t most_negative_index = (width * sample_disp) >> 5; - //// Negative sample_disp means, we need to use both references. - - //// TODO: update refs to take into account variating block size and shapes - //// (height is not always equal to width) - //ref_side = (vertical_mode ? in_ref_left : in_ref_above) + 1; - //ref_main = (vertical_mode ? in_ref_above : in_ref_left) + 1; - - //// Move the reference pixels to start from the middle to the later half of - //// the tmp_ref, so there is room for negative indices. - //for (int_fast32_t x = -1; x < width; ++x) { - // tmp_ref[x + index_offset] = ref_main[x]; - //} - //// Get a pointer to block index 0 in tmp_ref. - //ref_main = &tmp_ref[index_offset]; - //tmp_ref[index_offset -1] = tmp_ref[index_offset]; - - //// Extend the side reference to the negative indices of main reference. - //int_fast32_t col_sample_disp = 128; // rounding for the ">> 8" - //int_fast16_t inv_abs_sample_disp = modedisp2invsampledisp[abs(mode_disp)]; - //// TODO: add 'vertical_mode ? height : width' instead of 'width' - // - //for (int_fast32_t x = -1; x > most_negative_index; x--) { - // col_sample_disp += inv_abs_sample_disp; - // int_fast32_t side_index = col_sample_disp >> 8; - // tmp_ref[x + index_offset - 1] = ref_side[side_index - 1]; - //} - //tmp_ref[last_index + index_offset] = tmp_ref[last_index + index_offset - 1]; - //tmp_ref[most_negative_index + index_offset - 1] = tmp_ref[most_negative_index + index_offset]; - } - else { + __m256i w01 = _mm256_shuffle_epi8(aw256, w_shuf_01); + __m256i w23 = _mm256_shuffle_epi8(aw256, w_shuf_23); - memcpy(temp_main, vertical_mode ? in_ref_above : in_ref_left, sizeof(uvg_pixel)* (width * 2 + multi_ref_index + 1)); - memcpy(temp_side, vertical_mode ? in_ref_left : in_ref_above, sizeof(uvg_pixel)* (width * 2 + multi_ref_index + 1)); + // Load and shuffle reference pixels + __m128i vp0 = _mm_loadu_si128((__m128i*)(ref_main + delta_int[y + 0])); + __m128i vp1 = _mm_loadu_si128((__m128i*)(ref_main + delta_int[y + 1])); - const int s = 0; - const int max_index = (multi_ref_index << s) + 2; - const int ref_length = width << 1; - const uvg_pixel val = temp_main[ref_length + multi_ref_index]; - memset(temp_main + ref_length + multi_ref_index, val, max_index + 1); + __m256i vp_01 = _mm256_castsi128_si256(_mm_shuffle_epi8(vp0, p_shuf_01)); + vp_01 = _mm256_inserti128_si256(vp_01, _mm_shuffle_epi8(vp1, p_shuf_01), 1); + + __m256i vp_23 = _mm256_castsi128_si256(_mm_shuffle_epi8(vp0, p_shuf_23)); + vp_23 = _mm256_inserti128_si256(vp_23, _mm_shuffle_epi8(vp1, p_shuf_23), 1); + + __m256i vmadd01 = _mm256_maddubs_epi16(vp_01, w01); + __m256i vmadd23 = _mm256_maddubs_epi16(vp_23, w23); + __m256i sum = _mm256_add_epi16(vmadd01, vmadd23); + sum = _mm256_add_epi16(sum, _mm256_set1_epi16(32)); + sum = _mm256_srai_epi16(sum, 6); - ref_main = temp_main; - ref_side = temp_side; - //// sample_disp >= 0 means we don't need to refer to negative indices, - //// which means we can just use the references as is. - //ref_main = (vertical_mode ? in_ref_above : in_ref_left) + 1; - //ref_side = (vertical_mode ? in_ref_left : in_ref_above) + 1; + __m128i lo = _mm256_castsi256_si128(sum); + __m128i hi = _mm256_extracti128_si256(sum, 1); + __m128i packed = _mm_packus_epi16(lo, hi); - //memcpy(tmp_ref + width, ref_main, (width*2) * sizeof(uvg_pixel)); - //ref_main = &tmp_ref[width]; - //tmp_ref[width-1] = tmp_ref[width]; - //int8_t last_index = 1 + width*2; - //tmp_ref[width + last_index] = tmp_ref[width + last_index - 1]; + _mm_store_si128((__m128i*)dst, packed); + dst += 16; } +} - // compensate for line offset in reference line buffers - ref_main += multi_ref_index; - ref_side += multi_ref_index; +static void angular_pred_w8_ver_avx2(uvg_pixel* dst, const uvg_pixel* ref_main, const int16_t* delta_int, const int16_t* delta_fract, const int height, const int8_t(*filter)[4]) +{ + //const int width = 8; - static const int uvg_intra_hor_ver_dist_thres[8] = { 24, 24, 24, 14, 2, 0, 0, 0 }; - int filter_threshold = uvg_intra_hor_ver_dist_thres[log2_width]; - int dist_from_vert_or_hor = MIN(abs((int32_t)pred_mode - 50), abs((int32_t)pred_mode - 18)); + const __m128i p_shuf_01 = _mm_setr_epi8( + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, + 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08 + ); - bool use_cubic = true; // Default to cubic filter - if (dist_from_vert_or_hor > filter_threshold) { - if ((abs(sample_disp) & 0x1F) != 0) - { - use_cubic = false; - } - } - // Cubic must be used if ref line != 0 - if (multi_ref_index) { - use_cubic = true; - } + const __m128i p_shuf_23 = _mm_setr_epi8( + 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, + 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a + ); - if (sample_disp != 0) { - // The mode is not horizontal or vertical, we have to do interpolation. + const __m256i w_shuf_01_row01 = _mm256_setr_epi8( + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, + 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, + 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05 + ); - int_fast32_t delta_pos = sample_disp * multi_ref_index; - int64_t delta_int[4] = { 0 }; - int16_t delta_fract[4] = { 0 }; - for (int_fast32_t y = 0; y + 3 < width; y += 4) { + const __m256i w_shuf_23_row01 = _mm256_setr_epi8( + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, + 0x06, 0x07, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07, + 0x06, 0x07, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07 + ); - for (int yy = 0; yy < 4; ++yy) { - delta_pos += sample_disp; - delta_int[yy] = delta_pos >> 5; - delta_fract[yy] = delta_pos & (32 - 1); - } + const __m256i w_shuf_01_row23 = _mm256_setr_epi8( + 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, + 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, + 0x0c, 0x0d, 0x0c, 0x0d, 0x0c, 0x0d, 0x0c, 0x0d, + 0x0c, 0x0d, 0x0c, 0x0d, 0x0c, 0x0d, 0x0c, 0x0d + ); - if ((abs(sample_disp) & 0x1F) != 0) { - - // Luma Channel - if (channel_type == 0) { - - int16_t f[4][4] = { { 0 } }; - if (use_cubic) { - memcpy(f[0], cubic_filter[delta_fract[0]], 8); - memcpy(f[1], cubic_filter[delta_fract[1]], 8); - memcpy(f[2], cubic_filter[delta_fract[2]], 8); - memcpy(f[3], cubic_filter[delta_fract[3]], 8); - } - else { - for(int yy = 0; yy < 4; ++yy) { - const int16_t offset = (delta_fract[yy] >> 1); - f[yy][0] = 16 - offset; - f[yy][1] = 32 - offset; - f[yy][2] = 16 + offset; - f[yy][3] = offset; - } - } + const __m256i w_shuf_23_row23 = _mm256_setr_epi8( + 0x0a, 0x0b, 0x0a, 0x0b, 0x0a, 0x0b, 0x0a, 0x0b, + 0x0a, 0x0b, 0x0a, 0x0b, 0x0a, 0x0b, 0x0a, 0x0b, + 0x0e, 0x0f, 0x0e, 0x0f, 0x0e, 0x0f, 0x0e, 0x0f, + 0x0e, 0x0f, 0x0e, 0x0f, 0x0e, 0x0f, 0x0e, 0x0f + ); - // Do 4-tap intra interpolation filtering - uvg_pixel *p = (uvg_pixel*)ref_main; - __m256i vidx = _mm256_loadu_si256((__m256i *)delta_int); - __m256i all_weights = _mm256_loadu_si256((__m256i *)f); - __m256i w01 = _mm256_shuffle_epi8(all_weights, w_shuf_01); - __m256i w23 = _mm256_shuffle_epi8(all_weights, w_shuf_23); + // Do 4-tap intra interpolation filtering + // For a 8 width block, height must be at least 2. This version handles 4 lines at once to minimize vidx loads. + // No need to check height 2 cases, other function handles that. + for (int y = 0; y < height; y += 4) { + + // Load and shuffle filter weights + __m128i vidxw = _mm_loadu_si128((__m128i*) &delta_fract[y]); + __m128i vidxw32 = _mm_cvtepi16_epi32(vidxw); + __m128i all_weights = _mm_i32gather_epi32((const int32_t*)filter, vidxw32, 4); + __m256i aw256 = _mm256_inserti128_si256(_mm256_castsi128_si256(all_weights), all_weights, 1); + + __m256i w01_row01 = _mm256_shuffle_epi8(aw256, w_shuf_01_row01); + __m256i w23_row01 = _mm256_shuffle_epi8(aw256, w_shuf_23_row01); + __m256i w01_row23 = _mm256_shuffle_epi8(aw256, w_shuf_01_row23); + __m256i w23_row23 = _mm256_shuffle_epi8(aw256, w_shuf_23_row23); + + // Load and shuffle reference pixels + __m128i vp0 = _mm_loadu_si128((__m128i*)(ref_main + delta_int[y + 0])); + __m128i vp1 = _mm_loadu_si128((__m128i*)(ref_main + delta_int[y + 1])); + __m128i vp2 = _mm_loadu_si128((__m128i*)(ref_main + delta_int[y + 2])); + __m128i vp3 = _mm_loadu_si128((__m128i*)(ref_main + delta_int[y + 3])); + + __m256i vp_01_row01 = _mm256_castsi128_si256(_mm_shuffle_epi8(vp0, p_shuf_01)); + vp_01_row01 = _mm256_inserti128_si256(vp_01_row01, _mm_shuffle_epi8(vp1, p_shuf_01), 1); + + __m256i vp_23_row01 = _mm256_castsi128_si256(_mm_shuffle_epi8(vp0, p_shuf_23)); + vp_23_row01 = _mm256_inserti128_si256(vp_23_row01, _mm_shuffle_epi8(vp1, p_shuf_23), 1); + + __m256i vp_01_row23 = _mm256_castsi128_si256(_mm_shuffle_epi8(vp2, p_shuf_01)); + vp_01_row23 = _mm256_inserti128_si256(vp_01_row23, _mm_shuffle_epi8(vp3, p_shuf_01), 1); + + __m256i vp_23_row23 = _mm256_castsi128_si256(_mm_shuffle_epi8(vp2, p_shuf_23)); + vp_23_row23 = _mm256_inserti128_si256(vp_23_row23, _mm_shuffle_epi8(vp3, p_shuf_23), 1); + + __m256i vmadd01_row01 = _mm256_maddubs_epi16(vp_01_row01, w01_row01); + __m256i vmadd23_row01 = _mm256_maddubs_epi16(vp_23_row01, w23_row01); + __m256i vmadd01_row23 = _mm256_maddubs_epi16(vp_01_row23, w01_row23); + __m256i vmadd23_row23 = _mm256_maddubs_epi16(vp_23_row23, w23_row23); + + + __m256i sum01 = _mm256_add_epi16(vmadd01_row01, vmadd23_row01); + __m256i sum23 = _mm256_add_epi16(vmadd01_row23, vmadd23_row23); + sum01 = _mm256_add_epi16(sum01, _mm256_set1_epi16(32)); + sum23 = _mm256_add_epi16(sum23, _mm256_set1_epi16(32)); + sum01 = _mm256_srai_epi16(sum01, 6); + sum23 = _mm256_srai_epi16(sum23, 6); + + __m128i lo01 = _mm256_castsi256_si128(sum01); + __m128i hi01 = _mm256_extracti128_si256(sum01, 1); + __m128i lo23 = _mm256_castsi256_si128(sum23); + __m128i hi23 = _mm256_extracti128_si256(sum23, 1); + + __m128i packed01 = _mm_packus_epi16(lo01, hi01); + __m128i packed23 = _mm_packus_epi16(lo23, hi23); + //__m256i packed = _mm256_inserti128_si256(_mm256_castsi128_si256(packed01), packed23, 1); + + //_mm256_store_si256((__m256i*)dst, packed); + _mm_store_si128((__m128i*)(dst + 0), packed01); + _mm_store_si128((__m128i*)(dst + 16), packed23); + dst += 32; + } +} - for (int_fast32_t x = 0; x + 3 < width; x += 4, p += 4) { +NO_ASAN +static void angular_pred_w16_ver_avx2(uvg_pixel* dst, const uvg_pixel* ref_main, const int16_t* delta_int, const int16_t* delta_fract, const int width, const int height, const int8_t(*filter)[4]) +{ + const __m256i p_shuf_01 = _mm256_setr_epi8( + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, + 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, + 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08 + ); - __m256i vp = _mm256_i64gather_epi64((const long long int*)p, vidx, 1); - __m256i vp_01 = _mm256_shuffle_epi8(vp, p_shuf_01); - __m256i vp_23 = _mm256_shuffle_epi8(vp, p_shuf_23); - - __m256i dot_01 = _mm256_maddubs_epi16(vp_01, w01); - __m256i dot_23 = _mm256_maddubs_epi16(vp_23, w23); - __m256i sum = _mm256_add_epi16(dot_01, dot_23); - sum = _mm256_add_epi16(sum, _mm256_set1_epi16(32)); - sum = _mm256_srai_epi16(sum, 6); - - __m128i lo = _mm256_castsi256_si128(sum); - __m128i hi = _mm256_extracti128_si256(sum, 1); - __m128i filtered = _mm_packus_epi16(lo, hi); - - *(uint32_t*)(dst + (y + 0) * width + x) = _mm_extract_epi32(filtered, 0); - *(uint32_t*)(dst + (y + 1) * width + x) = _mm_extract_epi32(filtered, 1); - *(uint32_t*)(dst + (y + 2) * width + x) = _mm_extract_epi32(filtered, 2); - *(uint32_t*)(dst + (y + 3) * width + x) = _mm_extract_epi32(filtered, 3); - } - } - else { - - // Do linear filtering - for (int yy = 0; yy < 4; ++yy) { - for (int_fast32_t x = 0; x < width; ++x) { - uvg_pixel ref1 = ref_main[x + delta_int[yy] + 1]; - uvg_pixel ref2 = ref_main[x + delta_int[yy] + 2]; - dst[(y + yy) * width + x] = ref1 + ((delta_fract[yy] * (ref2 - ref1) + 16) >> 5); - } - } - } - } - else { - // Just copy the integer samples - for (int yy = 0; yy < 4; ++yy) { - uvg_pixel *dst_row = dst + (y + yy) * width; - uvg_pixel *ref_row = ref_main + delta_int[yy] + 1; - for (int_fast32_t x = 0; x + 3 < width; x += 4) { - memcpy(dst_row + x, ref_row + x, 4 * sizeof(dst[0])); - } - } - } + const __m256i p_shuf_23 = _mm256_setr_epi8( + 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, + 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, + 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, + 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a + ); - - // PDPC - bool PDPC_filter = ((width >= TR_MIN_WIDTH && height >= TR_MIN_WIDTH) || channel_type != 0); - if (pred_mode > 1 && pred_mode < 67) { - if (mode_disp < 0 || multi_ref_index) { // Cannot be used with MRL. - PDPC_filter = false; - } - else if (mode_disp > 0) { - PDPC_filter &= (scale >= 0); - } - } - if(PDPC_filter) { - - int16_t wL[4]; - int16_t left[4][4]; + const __m256i w_shuf_01 = _mm256_setr_epi8( + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01 + ); - int limit = MIN(3 << scale, width); + const __m256i w_shuf_23 = _mm256_setr_epi8( + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03 + ); - for (int x = 0; x < limit; x += 4) { + // Do 4-tap intra interpolation filtering + // For a 16 width block, height can be 1. + for (int y = 0; y < height; ++y) { - for (int xx = 0; xx < 4; ++xx) { - int inv_angle_sum = 256 + (x + xx + 1) * modedisp2invsampledisp[abs(mode_disp)]; - wL[xx] = 32 >> (2 * (x + xx) >> scale); + // Load and shuffle filter weights + // This load can read beyond the end of the filter table, however the values + // are not used in the shuffle operation. + __m128i vweights = _mm_loadu_si128((__m128i*)&filter[delta_fract[y]]); + __m256i vw256 = _mm256_inserti128_si256(_mm256_castsi128_si256(vweights), vweights, 1); - for (int yy = 0; yy < 4; ++yy) { - left[yy][xx] = ref_side[(y + yy) + (inv_angle_sum >> 9) + 1]; - } - } + __m256i w01 = _mm256_shuffle_epi8(vw256, w_shuf_01); + __m256i w23 = _mm256_shuffle_epi8(vw256, w_shuf_23); - __m128i vseq = _mm_setr_epi32(0, 1, 2, 3); - __m128i vidx = _mm_slli_epi32(vseq, log2_width); - __m128i vdst = _mm_i32gather_epi32((const int32_t*)(dst + y * width + x), vidx, 1); - __m256i vdst16 = _mm256_cvtepu8_epi16(vdst); - __m256i vleft = _mm256_loadu_si256((__m256i*)left); - uint64_t quad; - memcpy(&quad, wL, sizeof(quad)); - __m256i vwL = _mm256_set1_epi64x(quad); - __m256i accu = _mm256_sub_epi16(vleft, vdst16); - accu = _mm256_mullo_epi16(vwL, accu); - accu = _mm256_add_epi16(accu, _mm256_set1_epi16(32)); - accu = _mm256_srai_epi16(accu, 6); - accu = _mm256_add_epi16(vdst16, accu); - - __m128i lo = _mm256_castsi256_si128(accu); - __m128i hi = _mm256_extracti128_si256(accu, 1); - __m128i filtered = _mm_packus_epi16(lo, hi); - - // Need to mask remainder samples on the last iteration when limit % 4 != 0 - int rem_bits = 8 * (limit - x); - __m128i ones = _mm_set1_epi32(0xFF); - __m128i vmask = _mm_slli_epi32(ones, rem_bits); - - // 0 selects filtered, 1 vdst (unchanged) - vdst = _mm_blendv_epi8(filtered, vdst, vmask); - - *(uint32_t*)(dst + (y + 0) * width + x) = _mm_extract_epi32(vdst, 0); - *(uint32_t*)(dst + (y + 1) * width + x) = _mm_extract_epi32(vdst, 1); - *(uint32_t*)(dst + (y + 2) * width + x) = _mm_extract_epi32(vdst, 2); - *(uint32_t*)(dst + (y + 3) * width + x) = _mm_extract_epi32(vdst, 3); - } - } + for (int x = 0; x < width; x += 16) { + __m256i vp = _mm256_loadu_si256((__m256i*)(ref_main + x + delta_int[y])); - /* - if (pred_mode == 2 || pred_mode == 66) { - int wT = 16 >> MIN(31, ((y << 1) >> scale)); - for (int x = 0; x < width; x++) { - int wL = 16 >> MIN(31, ((x << 1) >> scale)); - if (wT + wL == 0) break; - int c = x + y + 1; - if (c >= 2 * width) { wL = 0; } - if (c >= 2 * width) { wT = 0; } - const uvg_pixel left = (wL != 0) ? ref_side[c] : 0; - const uvg_pixel top = (wT != 0) ? ref_main[c] : 0; - dst[y * width + x] = CLIP_TO_PIXEL((wL * left + wT * top + (64 - wL - wT) * dst[y * width + x] + 32) >> 6); - } - } else if (sample_disp == 0 || sample_disp >= 12) { - int inv_angle_sum_0 = 2; - for (int x = 0; x < width; x++) { - inv_angle_sum_0 += modedisp2invsampledisp[abs(mode_disp)]; - int delta_pos_0 = inv_angle_sum_0 >> 2; - int delta_frac_0 = delta_pos_0 & 63; - int delta_int_0 = delta_pos_0 >> 6; - int delta_y = y + delta_int_0 + 1; - // TODO: convert to JVET_K0500_WAIP - if (delta_y > width + width - 1) break; - - int wL = 32 >> MIN(31, ((x << 1) >> scale)); - if (wL == 0) break; - const uvg_pixel *p = ref_side + delta_y - 1; - uvg_pixel left = p[delta_frac_0 >> 5]; - dst[y * width + x] = CLIP_TO_PIXEL((wL * left + (64 - wL) * dst[y * width + x] + 32) >> 6); - } - }*/ - } - } - else { - // Mode is horizontal or vertical, just copy the pixels. + __m256i tmp = _mm256_permute4x64_epi64(vp, _MM_SHUFFLE(2, 1, 1, 0)); - // TODO: update outer loop to use height instead of width - for (int_fast32_t y = 0; y < width; ++y) { - for (int_fast32_t x = 0; x < width; ++x) { - dst[y * width + x] = ref_main[x + 1]; - } - if ((width >= 4 || channel_type != 0) && sample_disp >= 0 && multi_ref_index == 0) { - int scale = (log2_width + log2_width - 2) >> 2; - const uvg_pixel top_left = ref_main[0]; - const uvg_pixel left = ref_side[1 + y]; - for (int i = 0; i < MIN(3 << scale, width); i++) { - const int wL = 32 >> (2 * i >> scale); - const uvg_pixel val = dst[y * width + i]; - dst[y * width + i] = CLIP_TO_PIXEL(val + ((wL * (left - top_left) + 32) >> 6)); - } - } - } - } + __m256i vp_01 = _mm256_shuffle_epi8(tmp, p_shuf_01); + __m256i vp_23 = _mm256_shuffle_epi8(tmp, p_shuf_23); - // Flip the block if this is was a horizontal mode. - if (!vertical_mode) { - - const __m128i vtranspose_mask =_mm_setr_epi8( - 0, 4, 8, 12, - 1, 5, 9, 13, - 2, 6, 10, 14, - 3, 7, 11, 15 - ); - - const __m128i vseq = _mm_setr_epi32(0, 1, 2, 3); - const __m128i vidx = _mm_slli_epi32(vseq, log2_width); - - // Transpose as 4x4 subblocks - for (int_fast32_t y = 0; y + 3 < width; y += 4) { - for (int_fast32_t x = y; x + 3 < width; x += 4) { - - __m128i vtemp4x4 = _mm_i32gather_epi32((const int32_t*)(dst + x * width + y), vidx, 1); - __m128i v4x4 = _mm_i32gather_epi32((const int32_t*)(dst + y * width + x), vidx, 1); - vtemp4x4 = _mm_shuffle_epi8(vtemp4x4, vtranspose_mask); - v4x4 = _mm_shuffle_epi8(v4x4, vtranspose_mask); - - *(uint32_t*)(dst + (y + 0) * width + x) = _mm_extract_epi32(vtemp4x4, 0); - *(uint32_t*)(dst + (y + 1) * width + x) = _mm_extract_epi32(vtemp4x4, 1); - *(uint32_t*)(dst + (y + 2) * width + x) = _mm_extract_epi32(vtemp4x4, 2); - *(uint32_t*)(dst + (y + 3) * width + x) = _mm_extract_epi32(vtemp4x4, 3); - - *(uint32_t*)(dst + (x + 0) * width + y) = _mm_extract_epi32(v4x4, 0); - *(uint32_t*)(dst + (x + 1) * width + y) = _mm_extract_epi32(v4x4, 1); - *(uint32_t*)(dst + (x + 2) * width + y) = _mm_extract_epi32(v4x4, 2); - *(uint32_t*)(dst + (x + 3) * width + y) = _mm_extract_epi32(v4x4, 3); - } + __m256i dot_01 = _mm256_maddubs_epi16(vp_01, w01); + __m256i dot_23 = _mm256_maddubs_epi16(vp_23, w23); + __m256i sum = _mm256_add_epi16(dot_01, dot_23); + sum = _mm256_add_epi16(sum, _mm256_set1_epi16(32)); + sum = _mm256_srai_epi16(sum, 6); + + __m128i lo = _mm256_castsi256_si128(sum); + __m128i hi = _mm256_extracti128_si256(sum, 1); + __m128i filtered = _mm_packus_epi16(lo, hi); + + _mm_store_si128((__m128i*)dst, filtered); + dst += 16; } } } -/** - * \brief Generate planar prediction. - * \param cu_loc CU location and size data. - * \param color Color channel. - * \param in_ref_above Pointer to -1 index of above reference, length=width*2+1. - * \param in_ref_left Pointer to -1 index of left reference, length=width*2+1. - * \param dst Buffer of size width*width. - */ -static void uvg_intra_pred_planar_avx2( - const cu_loc_t* const cu_loc, - color_t color, - const uint8_t *const ref_top, - const uint8_t *const ref_left, - uint8_t *const dst) + +static void angular_pred_w4_hor_high_angle_avx2(uvg_pixel* dst, const uvg_pixel* ref_main, const int16_t* delta_int, const int16_t* delta_fract, const int height, const int8_t(*filter)[4]) { - // ISP_TODO: non-square block implementation, height is passed but not used - const int width = color == COLOR_Y ? cu_loc->width : cu_loc->chroma_width; - const int height = color == COLOR_Y ? cu_loc->height : cu_loc->chroma_height; - const int log2_width = uvg_g_convert_to_log2[width]; - const int log2_height = uvg_g_convert_to_log2[height]; + const int width = 4; - assert((log2_width >= 2 && log2_width <= 5) && (log2_height >= 2 && log2_height <= 5)); + const __m256i p_shuf_01 = _mm256_setr_epi8( + 0x00, 0x01, 0x08, 0x09, 0x01, 0x02, 0x09, 0x0a, + 0x02, 0x03, 0x0a, 0x0b, 0x03, 0x04, 0x0b, 0x0c, + 0x00, 0x01, 0x08, 0x09, 0x01, 0x02, 0x09, 0x0a, + 0x02, 0x03, 0x0a, 0x0b, 0x03, 0x04, 0x0b, 0x0c + ); + + const __m256i p_shuf_23 = _mm256_setr_epi8( + 0x02, 0x03, 0x0a, 0x0b, 0x03, 0x04, 0x0b, 0x0c, + 0x04, 0x05, 0x0c, 0x0d, 0x05, 0x06, 0x0d, 0x0e, + 0x02, 0x03, 0x0a, 0x0b, 0x03, 0x04, 0x0b, 0x0c, + 0x04, 0x05, 0x0c, 0x0d, 0x05, 0x06, 0x0d, 0x0e + ); + + const __m256i w_shuf_01 = _mm256_setr_epi8( + 0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0c, 0x0d, + 0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0c, 0x0d, + 0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0c, 0x0d, + 0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0c, 0x0d + ); - const uint8_t top_right = ref_top[width + 1]; - const uint8_t bottom_left = ref_left[width + 1]; + const __m256i w_shuf_23 = _mm256_setr_epi8( + 0x02, 0x03, 0x06, 0x07, 0x0a, 0x0b, 0x0e, 0x0f, + 0x02, 0x03, 0x06, 0x07, 0x0a, 0x0b, 0x0e, 0x0f, + 0x02, 0x03, 0x06, 0x07, 0x0a, 0x0b, 0x0e, 0x0f, + 0x02, 0x03, 0x06, 0x07, 0x0a, 0x0b, 0x0e, 0x0f + ); - if (log2_width > 2) { + // Copy the filter to local memory + __m128i vdfract = _mm_load_si128((__m128i*)delta_fract); + __m128i vidx = _mm_cvtepi16_epi32(vdfract); + __m128i all_weights = _mm_i32gather_epi32((const int32_t*)filter, vidx, 4); + + __m256i weights256 = _mm256_insertf128_si256(_mm256_castsi128_si256(all_weights), all_weights, 1); + // Shuffle the interpolation weights into place. + __m256i w01 = _mm256_shuffle_epi8(weights256, w_shuf_01); + __m256i w23 = _mm256_shuffle_epi8(weights256, w_shuf_23); + + // For a 4 width block, height must be at least 4. Handle 4 lines at once + for (int y = 0; y < height; y += 4) { + // This solution assumes the delta int values to be 64-bit + // Cast from 16-bit to 64-bit. + __m128i vidx = _mm_loadu_si128((__m128i*)delta_int); + __m256i vidx256 = _mm256_cvtepu16_epi64(vidx); - __m128i v_width = _mm_set1_epi16(width); - __m128i v_top_right = _mm_set1_epi16(top_right); - __m128i v_bottom_left = _mm_set1_epi16(bottom_left); + __m256i vp = _mm256_i64gather_epi64((const long long int*)&ref_main[y], vidx256, 1); + + __m256i vp_01 = _mm256_shuffle_epi8(vp, p_shuf_01); + __m256i vp_23 = _mm256_shuffle_epi8(vp, p_shuf_23); + + vp_01 = _mm256_permute4x64_epi64(vp_01, _MM_SHUFFLE(3, 1, 2, 0)); + vp_23 = _mm256_permute4x64_epi64(vp_23, _MM_SHUFFLE(3, 1, 2, 0)); + vp_01 = _mm256_shuffle_epi32(vp_01, _MM_SHUFFLE(3, 1, 2, 0)); + vp_23 = _mm256_shuffle_epi32(vp_23, _MM_SHUFFLE(3, 1, 2, 0)); - for (int y = 0; y < width; ++y) { + __m256i vmadd01 = _mm256_maddubs_epi16(vp_01, w01); + __m256i vmadd23 = _mm256_maddubs_epi16(vp_23, w23); + __m256i sum = _mm256_add_epi16(vmadd01, vmadd23); + sum = _mm256_add_epi16(sum, _mm256_set1_epi16(32)); + sum = _mm256_srai_epi16(sum, 6); - __m128i x_plus_1 = _mm_setr_epi16(-7, -6, -5, -4, -3, -2, -1, 0); - __m128i v_ref_left = _mm_set1_epi16(ref_left[y + 1]); - __m128i y_plus_1 = _mm_set1_epi16(y + 1); + __m128i lo = _mm256_castsi256_si128(sum); + __m128i hi = _mm256_extracti128_si256(sum, 1); + __m128i packed = _mm_packus_epi16(lo, hi); - for (int x = 0; x < width; x += 8) { - x_plus_1 = _mm_add_epi16(x_plus_1, _mm_set1_epi16(8)); - __m128i v_ref_top = _mm_loadl_epi64((__m128i*)&(ref_top[x + 1])); - v_ref_top = _mm_cvtepu8_epi16(v_ref_top); + _mm_store_si128((__m128i*)dst, packed); + dst += 16; + } +} - __m128i hor = _mm_add_epi16(_mm_mullo_epi16(_mm_sub_epi16(v_width, x_plus_1), v_ref_left), _mm_mullo_epi16(x_plus_1, v_top_right)); - __m128i ver = _mm_add_epi16(_mm_mullo_epi16(_mm_sub_epi16(v_width, y_plus_1), v_ref_top), _mm_mullo_epi16(y_plus_1, v_bottom_left)); +static void angular_pred_w4_hor_avx2(uvg_pixel* dst, const uvg_pixel* ref_main, const int16_t pred_mode, const int16_t multi_ref_line, const int16_t* delta_int, const int16_t* delta_fract, const int height, const int8_t (*filter)[4]) +{ + // const int width = 4; - //dst[y * width + x] = ho + const __m256i w_shuf_01 = _mm256_setr_epi8( + 0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0c, 0x0d, + 0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0c, 0x0d, + 0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0c, 0x0d, + 0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0c, 0x0d + ); - __m128i chunk = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(ver, hor), v_width), (log2_width + 1)); - chunk = _mm_packus_epi16(chunk, chunk); - _mm_storel_epi64((__m128i*)&(dst[y * width + x]), chunk); - } - } - } else { - // Only if log2_width == 2 <=> width == 4 - assert(width == 4); - const __m128i rl_shufmask = _mm_setr_epi32(0x04040404, 0x05050505, - 0x06060606, 0x07070707); + const __m256i w_shuf_23 = _mm256_setr_epi8( + 0x02, 0x03, 0x06, 0x07, 0x0a, 0x0b, 0x0e, 0x0f, + 0x02, 0x03, 0x06, 0x07, 0x0a, 0x0b, 0x0e, 0x0f, + 0x02, 0x03, 0x06, 0x07, 0x0a, 0x0b, 0x0e, 0x0f, + 0x02, 0x03, 0x06, 0x07, 0x0a, 0x0b, 0x0e, 0x0f + ); - const __m128i xp1 = _mm_set1_epi32 (0x04030201); - const __m128i yp1 = _mm_shuffle_epi8(xp1, rl_shufmask); + const int mode_idx = pred_mode <= 34 ? pred_mode + 12 : 80 - pred_mode; // Considers also wide angle modes. + const int table_offset = mode_idx * 192 + multi_ref_line * 64; - const __m128i rdist = _mm_set1_epi32 (0x00010203); - const __m128i bdist = _mm_shuffle_epi8(rdist, rl_shufmask); + const __m256i vpshuf0 = _mm256_load_si256((__m256i*) &intra_luma_interpolation_shuffle_vectors_w4_hor[table_offset + 0]); + const __m256i vpshuf1 = _mm256_load_si256((__m256i*) &intra_luma_interpolation_shuffle_vectors_w4_hor[table_offset + 32]); + + int ref_offset = MIN(delta_int[0], delta_int[3]); + + // Copy the filter to local memory + __m128i vdfract = _mm_loadu_si128((__m128i*)delta_fract); + __m128i vidx = _mm_cvtepi16_epi32(vdfract); + __m128i all_weights = _mm_i32gather_epi32((const int32_t*)filter, vidx, 4); + + __m256i weights256 = _mm256_insertf128_si256(_mm256_castsi128_si256(all_weights), all_weights, 1); + + // Shuffle the interpolation weights into place. + __m256i w01 = _mm256_shuffle_epi8(weights256, w_shuf_01); + __m256i w23 = _mm256_shuffle_epi8(weights256, w_shuf_23); + + // 4-tap interpolation filtering. + // For a 4 width block, height must be at least 4. Handle 4 lines at once + for (int y = 0; y < height; y += 4) { + // Load 16 samples and shuffle into place + __m128i vref = _mm_loadu_si128((__m128i*)&ref_main[y + ref_offset]); + __m256i vp = _mm256_insertf128_si256(_mm256_castsi128_si256(vref), vref, 1); + + __m256i vp_01 = _mm256_shuffle_epi8(vp, vpshuf0); + __m256i vp_23 = _mm256_shuffle_epi8(vp, vpshuf1); + + __m256i vmadd01 = _mm256_maddubs_epi16(vp_01, w01); + __m256i vmadd23 = _mm256_maddubs_epi16(vp_23, w23); + __m256i sum = _mm256_add_epi16(vmadd01, vmadd23); + sum = _mm256_add_epi16(sum, _mm256_set1_epi16(32)); + sum = _mm256_srai_epi16(sum, 6); + + __m128i lo = _mm256_castsi256_si128(sum); + __m128i hi = _mm256_extracti128_si256(sum, 1); + __m128i packed = _mm_packus_epi16(lo, hi); + + _mm_store_si128((__m128i*)dst, packed); + dst += 16; + } +} + +static void angular_pred_w8_hor_high_angle_avx2(uvg_pixel* dst, const uvg_pixel* ref_main, const int16_t* delta_int, const int16_t* delta_fract, const int height, const int8_t(*filter)[4]) +{ + const int width = 8; + + __m128i tmp = _mm_loadu_si128((__m128i*)delta_int); + __m256i vidx = _mm256_cvtepi16_epi32(tmp); + // Load weights + tmp = _mm_load_si128((__m128i*)delta_fract); + __m256i vidxw = _mm256_cvtepi16_epi32(tmp); + __m256i vweights = _mm256_i32gather_epi32((const int32_t*)filter, vidxw, 4); + + for (int y = 0; y < height; y += 2) { + + // Do 4-tap intra interpolation filtering + uvg_pixel* p = (uvg_pixel*)(ref_main + y); + __m256i vp0 = _mm256_i32gather_epi32((const int*)&ref_main[y + 0], vidx, 1); + __m256i vp1 = _mm256_i32gather_epi32((const int*)&ref_main[y + 1], vidx, 1); + + __m256i vmadd0 = _mm256_maddubs_epi16(vp0, vweights); + __m256i vmadd1 = _mm256_maddubs_epi16(vp1, vweights); + __m256i sum = _mm256_hadd_epi16(vmadd0, vmadd1); + sum = _mm256_add_epi16(sum, _mm256_set1_epi16(32)); + sum = _mm256_srai_epi16(sum, 6); + + __m128i lo = _mm256_castsi256_si128(sum); + __m128i hi = _mm256_extracti128_si256(sum, 1); + __m128i packed = _mm_packus_epi16(lo, hi); + packed = _mm_shuffle_epi32(packed, _MM_SHUFFLE(3, 1, 2, 0)); + + _mm_store_si128((__m128i*)dst, packed); + + dst += 16; + } +} - const __m128i wid16 = _mm_set1_epi16 (width); - const __m128i tr = _mm_set1_epi8 (top_right); - const __m128i bl = _mm_set1_epi8 (bottom_left); +static void angular_pred_w8_hor_avx2(uvg_pixel* dst, const uvg_pixel* ref_main, const int16_t pred_mode, const int16_t multi_ref_line, const int16_t* delta_int, const int16_t* delta_fract, const int height, const int8_t (*filter)[4]) +{ + // const int width = 8; + + __m256i vwshuf01 = _mm256_setr_epi8( + 0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0c, 0x0d, + 0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0c, 0x0d, + 0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0c, 0x0d, + 0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0c, 0x0d + ); - uint32_t rt14 = *(const uint32_t *)(ref_top + 1); - uint32_t rl14 = *(const uint32_t *)(ref_left + 1); - uint64_t rt14_64 = (uint64_t)rt14; - uint64_t rl14_64 = (uint64_t)rl14; - uint64_t rtl14 = rt14_64 | (rl14_64 << 32); + __m256i vwshuf23 = _mm256_setr_epi8( + 0x02, 0x03, 0x06, 0x07, 0x0a, 0x0b, 0x0e, 0x0f, + 0x02, 0x03, 0x06, 0x07, 0x0a, 0x0b, 0x0e, 0x0f, + 0x02, 0x03, 0x06, 0x07, 0x0a, 0x0b, 0x0e, 0x0f, + 0x02, 0x03, 0x06, 0x07, 0x0a, 0x0b, 0x0e, 0x0f + ); - __m128i rtl_v = _mm_cvtsi64_si128 (rtl14); - __m128i rt = _mm_broadcastd_epi32(rtl_v); - __m128i rl = _mm_shuffle_epi8 (rtl_v, rl_shufmask); + int ref_offset = MIN(delta_int[0], delta_int[7]); + const __m256i v32s = _mm256_set1_epi16(32); - __m128i rtrl_l = _mm_unpacklo_epi8 (rt, rl); - __m128i rtrl_h = _mm_unpackhi_epi8 (rt, rl); + // Load weights + __m128i tmp = _mm_loadu_si128((__m128i*)delta_fract); + __m256i vidxw = _mm256_cvtepi16_epi32(tmp); + __m256i vweights = _mm256_i32gather_epi32((const int32_t*)filter, vidxw, 4); + + __m256i vw01 = _mm256_shuffle_epi8(vweights, vwshuf01); + __m256i vw23 = _mm256_shuffle_epi8(vweights, vwshuf23); - __m128i bdrd_l = _mm_unpacklo_epi8 (bdist, rdist); - __m128i bdrd_h = _mm_unpackhi_epi8 (bdist, rdist); + vw01 = _mm256_permute4x64_epi64(vw01, _MM_SHUFFLE(3, 1, 2, 0)); + vw23 = _mm256_permute4x64_epi64(vw23, _MM_SHUFFLE(3, 1, 2, 0)); - __m128i hvs_lo = _mm_maddubs_epi16 (rtrl_l, bdrd_l); - __m128i hvs_hi = _mm_maddubs_epi16 (rtrl_h, bdrd_h); + const int mode_idx = pred_mode <= 34 ? pred_mode + 12 : 80 - pred_mode; // Considers also wide angle modes. + const int table_offset = mode_idx * 192 + multi_ref_line * 64; - __m128i xp1yp1_l = _mm_unpacklo_epi8 (xp1, yp1); - __m128i xp1yp1_h = _mm_unpackhi_epi8 (xp1, yp1); - __m128i trbl_lh = _mm_unpacklo_epi8 (tr, bl); + const __m256i vpshuf01 = _mm256_loadu_si256((__m256i*) &intra_luma_interpolation_shuffle_vectors_w8_hor[table_offset + 0]); + const __m256i vpshuf23 = _mm256_loadu_si256((__m256i*) &intra_luma_interpolation_shuffle_vectors_w8_hor[table_offset + 32]); - __m128i addend_l = _mm_maddubs_epi16 (trbl_lh, xp1yp1_l); - __m128i addend_h = _mm_maddubs_epi16 (trbl_lh, xp1yp1_h); + // 4-tap interpolation filtering. + // For a 8 width block, height must be at least 2. Handle 2 lines at once. + for (int y = 0; y < height; y += 2) { + // Load samples and shuffle into place + __m128i vp = _mm_loadu_si128((__m128i*)&ref_main[y + ref_offset]); + __m256i vp256 = _mm256_inserti128_si256(_mm256_castsi128_si256(vp), vp, 1); - addend_l = _mm_add_epi16 (addend_l, wid16); - addend_h = _mm_add_epi16 (addend_h, wid16); + __m256i vp01 = _mm256_shuffle_epi8(vp256, vpshuf01); + __m256i vp23 = _mm256_shuffle_epi8(vp256, vpshuf23); + + __m256i vmadd01 = _mm256_maddubs_epi16(vp01, vw01); + __m256i vmadd23 = _mm256_maddubs_epi16(vp23, vw23); + __m256i sum = _mm256_add_epi16(vmadd01, vmadd23); + sum = _mm256_add_epi16(sum, v32s); + sum = _mm256_srai_epi16(sum, 6); - __m128i sum_l = _mm_add_epi16 (hvs_lo, addend_l); - __m128i sum_h = _mm_add_epi16 (hvs_hi, addend_h); + __m128i lo = _mm256_castsi256_si128(sum); + __m128i hi = _mm256_extracti128_si256(sum, 1); + __m128i packed = _mm_packus_epi16(lo, hi); - // Shift right by log2_width + 1 - __m128i sum_l_t = _mm_srli_epi16 (sum_l, 3); - __m128i sum_h_t = _mm_srli_epi16 (sum_h, 3); - __m128i result = _mm_packus_epi16 (sum_l_t, sum_h_t); - _mm_storeu_si128((__m128i *)dst, result); - } -} - -// Calculate the DC value for a 4x4 block. The algorithm uses slightly -// different addends, multipliers etc for different pixels in the block, -// but for a fixed-size implementation one vector wide, all the weights, -// addends etc can be preinitialized for each position. -static void pred_filtered_dc_4x4(const uint8_t *ref_top, - const uint8_t *ref_left, - uint8_t *out_block, - const uint8_t multi_ref_idx) -{ - const uint32_t rt_u32 = *(const uint32_t *)(ref_top + 1); - const uint32_t rl_u32 = *(const uint32_t *)(ref_left + 1); - - const __m128i zero = _mm_setzero_si128(); - const __m128i twos = _mm_set1_epi8(2); - - // Hack. Move 4 u8's to bit positions 0, 64, 128 and 192 in two regs, to - // expand them to 16 bits sort of "for free". Set highest bits on all the - // other bytes in vectors to zero those bits in the result vector. - const __m128i rl_shuf_lo = _mm_setr_epi32(0x80808000, 0x80808080, - 0x80808001, 0x80808080); - const __m128i rl_shuf_hi = _mm_add_epi8 (rl_shuf_lo, twos); - - // Every second multiplier is 1, because we want maddubs to calculate - // a + bc = 1 * a + bc (actually 2 + bc). We need to fill a vector with - // ((u8)2)'s for other stuff anyway, so that can also be used here. - const __m128i mult_lo = _mm_setr_epi32(0x01030102, 0x01030103, - 0x01040103, 0x01040104); - const __m128i mult_hi = _mm_setr_epi32(0x01040103, 0x01040104, - 0x01040103, 0x01040104); - __m128i four = _mm_cvtsi32_si128 (4); - __m128i rt = _mm_cvtsi32_si128 (rt_u32); - __m128i rl = _mm_cvtsi32_si128 (rl_u32); - __m128i rtrl = _mm_unpacklo_epi32 (rt, rl); - - __m128i sad0 = _mm_sad_epu8 (rtrl, zero); - __m128i sad1 = _mm_shuffle_epi32 (sad0, _MM_SHUFFLE(1, 0, 3, 2)); - __m128i sad2 = _mm_add_epi64 (sad0, sad1); - __m128i sad3 = _mm_add_epi64 (sad2, four); - - __m128i dc_64 = _mm_srli_epi64 (sad3, 3); - __m128i dc_8 = _mm_broadcastb_epi8(dc_64); - - __m128i rl_lo = _mm_shuffle_epi8 (rl, rl_shuf_lo); - __m128i rl_hi = _mm_shuffle_epi8 (rl, rl_shuf_hi); - - __m128i rt_lo = _mm_unpacklo_epi8 (rt, zero); - __m128i rt_hi = zero; - - __m128i dc_addend = _mm_unpacklo_epi8(dc_8, twos); - - __m128i dc_multd_lo = _mm_maddubs_epi16(dc_addend, mult_lo); - __m128i dc_multd_hi = _mm_maddubs_epi16(dc_addend, mult_hi); - - __m128i rl_rt_lo = _mm_add_epi16 (rl_lo, rt_lo); - __m128i rl_rt_hi = _mm_add_epi16 (rl_hi, rt_hi); - - __m128i res_lo = _mm_add_epi16 (dc_multd_lo, rl_rt_lo); - __m128i res_hi = _mm_add_epi16 (dc_multd_hi, rl_rt_hi); - - res_lo = _mm_srli_epi16 (res_lo, 2); - res_hi = _mm_srli_epi16 (res_hi, 2); - - __m128i final = _mm_packus_epi16 (res_lo, res_hi); - _mm_storeu_si128((__m128i *)out_block, final); -} - -static void pred_filtered_dc_8x8(const uint8_t *ref_top, - const uint8_t *ref_left, - uint8_t *out_block, - const uint8_t multi_ref_idx) -{ - const uint64_t rt_u64 = *(const uint64_t *)(ref_top + 1); - const uint64_t rl_u64 = *(const uint64_t *)(ref_left + 1); - - const __m128i zero128 = _mm_setzero_si128(); - const __m256i twos = _mm256_set1_epi8(2); - - // DC multiplier is 2 at (0, 0), 3 at (*, 0) and (0, *), and 4 at (*, *). - // There is a constant addend of 2 on each pixel, use values from the twos - // register and multipliers of 1 for that, to use maddubs for an (a*b)+c - // operation. - const __m256i mult_up_lo = _mm256_setr_epi32(0x01030102, 0x01030103, - 0x01030103, 0x01030103, - 0x01040103, 0x01040104, - 0x01040104, 0x01040104); - - // The 6 lowest rows have same multipliers, also the DC values and addends - // are the same so this works for all of those - const __m256i mult_rest = _mm256_permute4x64_epi64(mult_up_lo, _MM_SHUFFLE(3, 2, 3, 2)); - - // Every 8-pixel row starts with the next pixel of ref_left. Along with - // doing the shuffling, also expand u8->u16, ie. move bytes 0 and 1 from - // ref_left to bit positions 0 and 128 in rl_up_lo, 2 and 3 to rl_up_hi, - // etc. The places to be zeroed out are 0x80 instead of the usual 0xff, - // because this allows us to form new masks on the fly by adding 0x02-bytes - // to this mask and still retain the highest bits as 1 where things should - // be zeroed out. - const __m256i rl_shuf_up_lo = _mm256_setr_epi32(0x80808000, 0x80808080, - 0x80808080, 0x80808080, - 0x80808001, 0x80808080, - 0x80808080, 0x80808080); - // And don't waste memory or architectural regs, hope these instructions - // will be placed in between the shuffles by the compiler to only use one - // register for the shufmasks, and executed way ahead of time because their - // regs can be renamed. - const __m256i rl_shuf_up_hi = _mm256_add_epi8 (rl_shuf_up_lo, twos); - const __m256i rl_shuf_dn_lo = _mm256_add_epi8 (rl_shuf_up_hi, twos); - const __m256i rl_shuf_dn_hi = _mm256_add_epi8 (rl_shuf_dn_lo, twos); + _mm_store_si128((__m128i*)dst, packed); - __m128i eight = _mm_cvtsi32_si128 (8); - __m128i rt = _mm_cvtsi64_si128 (rt_u64); - __m128i rl = _mm_cvtsi64_si128 (rl_u64); - __m128i rtrl = _mm_unpacklo_epi64 (rt, rl); + dst += 16; + } +} - __m128i sad0 = _mm_sad_epu8 (rtrl, zero128); - __m128i sad1 = _mm_shuffle_epi32 (sad0, _MM_SHUFFLE(1, 0, 3, 2)); - __m128i sad2 = _mm_add_epi64 (sad0, sad1); - __m128i sad3 = _mm_add_epi64 (sad2, eight); +static void angular_pred_w16_hor_high_angle_avx2(uvg_pixel* dst, const uvg_pixel* ref_main, const int16_t* delta_int, const int16_t* delta_fract, const int width, const int height, const int8_t(*filter)[4]) +{ + __m256i vw0[4]; + __m256i vw1[4]; + for (int x = 0, i = 0; x < width; x += 16, ++i) { + __m128i tmp0 = _mm_loadu_si128((__m128i*) &delta_fract[x + 0]); + __m128i tmp1 = _mm_loadu_si128((__m128i*) &delta_fract[x + 8]); - __m128i dc_64 = _mm_srli_epi64 (sad3, 4); - __m256i dc_8 = _mm256_broadcastb_epi8(dc_64); + __m256i vidx0 = _mm256_cvtepi16_epi32(tmp0); + __m256i vidx1 = _mm256_cvtepi16_epi32(tmp1); - __m256i dc_addend = _mm256_unpacklo_epi8 (dc_8, twos); + vw0[i] = _mm256_i32gather_epi32((const int32_t*)filter, vidx0, 4); + vw1[i] = _mm256_i32gather_epi32((const int32_t*)filter, vidx1, 4); + } + + for (int x = 0, vi = 0; x < width; x += 16, ++vi) { + __m128i tmp0 = _mm_loadu_si128((__m128i*)&delta_int[x]); + __m128i tmp1 = _mm_loadu_si128((__m128i*)&delta_int[x + 8]); + __m256i vidx0 = _mm256_cvtepi16_epi32(tmp0); + __m256i vidx1 = _mm256_cvtepi16_epi32(tmp1); + + // Width 16, handle one row at a time + for (int y = 0; y < height; ++y) { + // Do 4-tap intra interpolation filtering + __m256i vp0 = _mm256_i32gather_epi32((const int*)&ref_main[y], vidx0, 1); + __m256i vp1 = _mm256_i32gather_epi32((const int*)&ref_main[y], vidx1, 1); + + __m256i vmadd0 = _mm256_maddubs_epi16(vp0, vw0[vi]); + __m256i vmadd1 = _mm256_maddubs_epi16(vp1, vw1[vi]); + __m256i sum = _mm256_hadd_epi16(vmadd0, vmadd1); + sum = _mm256_add_epi16(sum, _mm256_set1_epi16(32)); + sum = _mm256_srai_epi16(sum, 6); + + __m128i lo = _mm256_castsi256_si128(sum); + __m128i hi = _mm256_extracti128_si256(sum, 1); + __m128i packed = _mm_packus_epi16(lo, hi); + packed = _mm_shuffle_epi32(packed, _MM_SHUFFLE(3, 1, 2, 0)); + + _mm_store_si128((__m128i*)(dst + (y * width + x)), packed); + } + } +} - __m256i dc_up_lo = _mm256_maddubs_epi16 (dc_addend, mult_up_lo); - __m256i dc_rest = _mm256_maddubs_epi16 (dc_addend, mult_rest); +static void angular_pred_w16_hor_avx2(uvg_pixel* dst, const uvg_pixel* ref_main, const int16_t pred_mode, const int16_t multi_ref_line, const int16_t* delta_int, const int16_t* delta_fract, const int height, const int8_t(*filter)[4]) +{ + const int width = 16; + const int ref_offset = MIN(delta_int[0], delta_int[15]); + const __m256i v32s = _mm256_set1_epi16(32); + + __m128i tmp0 = _mm_loadu_si128((__m128i*) &delta_fract[0]); + __m128i tmp1 = _mm_loadu_si128((__m128i*) &delta_fract[8]); - // rt_dn is all zeros, as is rt_up_hi. This'll get us the rl and rt parts - // in A|B, C|D order instead of A|C, B|D that could be packed into abcd - // order, so these need to be permuted before adding to the weighed DC - // values. - __m256i rt_up_lo = _mm256_cvtepu8_epi16 (rt); + __m256i vidx0 = _mm256_cvtepi16_epi32(tmp0); + __m256i vidx1 = _mm256_cvtepi16_epi32(tmp1); - __m256i rlrlrlrl = _mm256_broadcastq_epi64(rl); - __m256i rl_up_lo = _mm256_shuffle_epi8 (rlrlrlrl, rl_shuf_up_lo); + __m256i vw0 = _mm256_i32gather_epi32((const int32_t*)filter, vidx0, 4); + __m256i vw1 = _mm256_i32gather_epi32((const int32_t*)filter, vidx1, 4); - // Everything ref_top is zero except on the very first row - __m256i rt_rl_up_hi = _mm256_shuffle_epi8 (rlrlrlrl, rl_shuf_up_hi); - __m256i rt_rl_dn_lo = _mm256_shuffle_epi8 (rlrlrlrl, rl_shuf_dn_lo); - __m256i rt_rl_dn_hi = _mm256_shuffle_epi8 (rlrlrlrl, rl_shuf_dn_hi); + // Unused modes are pruned from the table and it starts from mode 5. Offset mode 5 to zero index. + const int mode_idx = pred_mode - 5; + const int table_offset = mode_idx * 768 + multi_ref_line * 256; // mode_idx * (3 * 256) + mrl * 256 - __m256i rt_rl_up_lo = _mm256_add_epi16 (rt_up_lo, rl_up_lo); + const __m256i vpshuf0 = _mm256_loadu_si256((__m256i*) &intra_luma_interpolation_shuffle_vectors_w64_hor[table_offset + 0]); + const __m256i vpshuf1 = _mm256_loadu_si256((__m256i*) &intra_luma_interpolation_shuffle_vectors_w64_hor[table_offset + 32]); - __m256i rt_rl_up_lo_2 = _mm256_permute2x128_si256(rt_rl_up_lo, rt_rl_up_hi, 0x20); - __m256i rt_rl_up_hi_2 = _mm256_permute2x128_si256(rt_rl_up_lo, rt_rl_up_hi, 0x31); - __m256i rt_rl_dn_lo_2 = _mm256_permute2x128_si256(rt_rl_dn_lo, rt_rl_dn_hi, 0x20); - __m256i rt_rl_dn_hi_2 = _mm256_permute2x128_si256(rt_rl_dn_lo, rt_rl_dn_hi, 0x31); + // Width 16, handle one row at a time + for (int y = 0; y < height; ++y) { + // Do 4-tap intra interpolation filtering + __m128i vp = _mm_loadu_si128((__m128i*)&ref_main[y + ref_offset]); + __m256i vp256 = _mm256_inserti128_si256(_mm256_castsi128_si256(vp), vp, 1); - __m256i up_lo = _mm256_add_epi16(rt_rl_up_lo_2, dc_up_lo); - __m256i up_hi = _mm256_add_epi16(rt_rl_up_hi_2, dc_rest); - __m256i dn_lo = _mm256_add_epi16(rt_rl_dn_lo_2, dc_rest); - __m256i dn_hi = _mm256_add_epi16(rt_rl_dn_hi_2, dc_rest); + __m256i vp0 = _mm256_shuffle_epi8(vp256, vpshuf0); + __m256i vp1 = _mm256_shuffle_epi8(vp256, vpshuf1); - up_lo = _mm256_srli_epi16(up_lo, 2); - up_hi = _mm256_srli_epi16(up_hi, 2); - dn_lo = _mm256_srli_epi16(dn_lo, 2); - dn_hi = _mm256_srli_epi16(dn_hi, 2); + __m256i vmadd0 = _mm256_maddubs_epi16(vp0, vw0); + __m256i vmadd1 = _mm256_maddubs_epi16(vp1, vw1); + __m256i sum = _mm256_hadd_epi16(vmadd0, vmadd1); + sum = _mm256_add_epi16(sum, v32s); + sum = _mm256_srai_epi16(sum, 6); - __m256i res_up = _mm256_packus_epi16(up_lo, up_hi); - __m256i res_dn = _mm256_packus_epi16(dn_lo, dn_hi); + __m128i lo = _mm256_castsi256_si128(sum); + __m128i hi = _mm256_extracti128_si256(sum, 1); + __m128i packed = _mm_packus_epi16(lo, hi); + packed = _mm_shuffle_epi32(packed, _MM_SHUFFLE(3, 1, 2, 0)); - _mm256_storeu_si256(((__m256i *)out_block) + 0, res_up); - _mm256_storeu_si256(((__m256i *)out_block) + 1, res_dn); + _mm_store_si128((__m128i*)dst, packed); + dst += 16; + } } -static INLINE __m256i cvt_u32_si256(const uint32_t u) +// Note: use this same function also for w64. w16 could use this, but it was slightly faster without the for loop overheads +static void angular_pred_w32_hor_avx2(uvg_pixel* dst, const uvg_pixel* ref_main, const int16_t pred_mode, const int16_t multi_ref_line, const int16_t* delta_int, const int16_t* delta_fract, const int width, const int height, const int8_t(*filter)[4]) { - const __m256i zero = _mm256_setzero_si256(); - return _mm256_insert_epi32(zero, u, 0); -} + const __m256i v32s = _mm256_set1_epi16(32); -static void pred_filtered_dc_16x16(const uint8_t *ref_top, - const uint8_t *ref_left, - uint8_t *out_block, - const uint8_t multi_ref_idx) -{ - const __m128i rt_128 = _mm_loadu_si128((const __m128i *)(ref_top + 1)); - const __m128i rl_128 = _mm_loadu_si128((const __m128i *)(ref_left + 1)); + // Unused modes are pruned from the table and it starts from mode 5. Offset mode 5 to zero index. + const int mode_idx = pred_mode - 5; + const int table_offset = mode_idx * 768 + multi_ref_line * 256; // mode_idx * (3 * 256) + mrl * 256 + + for (int x = 0, shuf = table_offset; x < width; x += 16, shuf += 64) { + const int ref_offset = MIN(delta_int[x], delta_int[x + 15]); - const __m128i zero_128 = _mm_setzero_si128(); - const __m256i zero = _mm256_setzero_si256(); - const __m256i twos = _mm256_set1_epi8(2); + __m128i tmp0 = _mm_loadu_si128((__m128i*)&delta_fract[x]); + __m128i tmp1 = _mm_loadu_si128((__m128i*)&delta_fract[x + 8]); - const __m256i mult_r0 = _mm256_setr_epi32(0x01030102, 0x01030103, - 0x01030103, 0x01030103, - 0x01030103, 0x01030103, - 0x01030103, 0x01030103); + __m256i vidx0 = _mm256_cvtepi16_epi32(tmp0); + __m256i vidx1 = _mm256_cvtepi16_epi32(tmp1); - const __m256i mult_left = _mm256_set1_epi16(0x0103); + __m256i vw0 = _mm256_i32gather_epi32((const int32_t*)filter, vidx0, 4); + __m256i vw1 = _mm256_i32gather_epi32((const int32_t*)filter, vidx1, 4); - // Leftmost bytes' blend mask, to move bytes (pixels) from the leftmost - // column vector to the result row - const __m256i lm8_bmask = _mm256_setr_epi32(0xff, 0, 0, 0, 0xff, 0, 0, 0); + __m256i vpshuf0 = _mm256_loadu_si256((__m256i*) &intra_luma_interpolation_shuffle_vectors_w64_hor[shuf + 0]); + __m256i vpshuf1 = _mm256_loadu_si256((__m256i*) &intra_luma_interpolation_shuffle_vectors_w64_hor[shuf + 32]); - __m128i sixteen = _mm_cvtsi32_si128(16); - __m128i sad0_t = _mm_sad_epu8 (rt_128, zero_128); - __m128i sad0_l = _mm_sad_epu8 (rl_128, zero_128); - __m128i sad0 = _mm_add_epi64(sad0_t, sad0_l); + // Width 16, handle one row at a time + for (int y = 0; y < height; ++y) { + // Do 4-tap intra interpolation filtering + __m128i vp = _mm_loadu_si128((__m128i*) &ref_main[y + ref_offset]); + __m256i vp256 = _mm256_inserti128_si256(_mm256_castsi128_si256(vp), vp, 1); - __m128i sad1 = _mm_shuffle_epi32 (sad0, _MM_SHUFFLE(1, 0, 3, 2)); - __m128i sad2 = _mm_add_epi64 (sad0, sad1); - __m128i sad3 = _mm_add_epi64 (sad2, sixteen); + __m256i vp0 = _mm256_shuffle_epi8(vp256, vpshuf0); + __m256i vp1 = _mm256_shuffle_epi8(vp256, vpshuf1); - __m128i dc_64 = _mm_srli_epi64 (sad3, 5); - __m256i dc_8 = _mm256_broadcastb_epi8 (dc_64); + __m256i vmadd0 = _mm256_maddubs_epi16(vp0, vw0); + __m256i vmadd1 = _mm256_maddubs_epi16(vp1, vw1); + __m256i sum = _mm256_hadd_epi16(vmadd0, vmadd1); + sum = _mm256_add_epi16(sum, v32s); + sum = _mm256_srai_epi16(sum, 6); - __m256i rt = _mm256_cvtepu8_epi16 (rt_128); - __m256i rl = _mm256_cvtepu8_epi16 (rl_128); + __m128i lo = _mm256_castsi256_si128(sum); + __m128i hi = _mm256_extracti128_si256(sum, 1); + __m128i packed = _mm_packus_epi16(lo, hi); + packed = _mm_shuffle_epi32(packed, _MM_SHUFFLE(3, 1, 2, 0)); - uint8_t rl0 = *(uint8_t *)(ref_left + 1); - __m256i rl_r0 = cvt_u32_si256((uint32_t)rl0); + _mm_store_si128((__m128i*)(dst + (y * width + x)), packed); + } + } +} - __m256i rlrt_r0 = _mm256_add_epi16(rl_r0, rt); - __m256i dc_addend = _mm256_unpacklo_epi8(dc_8, twos); - __m256i r0 = _mm256_maddubs_epi16(dc_addend, mult_r0); - __m256i left_dcs = _mm256_maddubs_epi16(dc_addend, mult_left); +// Linear interpolation filter for width 4 has a different call, since it uses premade tables for coefficients +static void angular_pred_linear_filter_w4_ver_avx2(uvg_pixel* dst, uvg_pixel* ref, const int height, const int16_t* delta_int, const int32_t pred_mode) +{ + const int16_t* dint = delta_int; + const __m128i v16s = _mm_set1_epi16(16); + + const int mode_idx = pred_mode <= 34 ? pred_mode - 2 : 66 - pred_mode; + const int weight_table_offset = coeff_table_mode_offsets[mode_idx]; + const int vnum = coeff_vector128_num_by_mode[mode_idx]; + const int modulo = vnum - 1; + int offset_num = 0; + + ALIGNED(16) int16_t shuffle_vector_offsets[8]; + memcpy(shuffle_vector_offsets, &intra_chroma_linear_interpolation_w4_ver_shuffle_vector_offset[mode_idx * 8], sizeof(int16_t) * 8); + + // Height has to be at least 4, handle 4 lines at once + for (int y = 0; y < height; y += 4) { + // Load refs from smallest index onwards, shuffle will handle the rest. The smallest index will be at one of these delta int table indices + const int16_t min_offset = 1 + MIN(dint[0], dint[3]); + dint += 4; + // Load enough reff samples to cover four 4 width lines. Shuffles will put the samples in correct places. + const __m128i vsrc_raw = _mm_loadu_si128((const __m128i*) & ref[min_offset]); + const int offset = weight_table_offset + (offset_num * 16); + + const __m128i vcoeff0 = _mm_load_si128((const __m128i*)&intra_chroma_linear_interpolation_weights_w4_ver[offset]); + const __m128i vcoeff1 = vnum == 1 ? vcoeff0 : _mm_load_si128((const __m128i*)&intra_chroma_linear_interpolation_weights_w4_ver[offset + 16]); - r0 = _mm256_add_epi16 (r0, rlrt_r0); - r0 = _mm256_srli_epi16 (r0, 2); - __m256i r0r0 = _mm256_packus_epi16 (r0, r0); - r0r0 = _mm256_permute4x64_epi64(r0r0, _MM_SHUFFLE(3, 1, 2, 0)); + const __m128i vshuf0 = _mm_load_si128((const __m128i*) &intra_chroma_linear_interpolation_shuffle_vectors_w4_ver[shuffle_vector_offsets[y >> 2] + 0]); + const __m128i vshuf1 = _mm_load_si128((const __m128i*) &intra_chroma_linear_interpolation_shuffle_vectors_w4_ver[shuffle_vector_offsets[y >> 2] + 16]); - __m256i leftmosts = _mm256_add_epi16 (left_dcs, rl); - leftmosts = _mm256_srli_epi16 (leftmosts, 2); + __m128i vsrc0 = _mm_shuffle_epi8(vsrc_raw, vshuf0); + __m128i vsrc1 = _mm_shuffle_epi8(vsrc_raw, vshuf1); + + __m128i res0 = _mm_maddubs_epi16(vsrc0, vcoeff0); + __m128i res1 = _mm_maddubs_epi16(vsrc1, vcoeff1); + res0 = _mm_add_epi16(res0, v16s); + res1 = _mm_add_epi16(res1, v16s); + res0 = _mm_srai_epi16(res0, 5); + res1 = _mm_srai_epi16(res1, 5); + + _mm_store_si128((__m128i*)dst, _mm_packus_epi16(res0, res1)); + dst += 16; + offset_num += 2; + // This resets the offset number to 0 when it reaches the end of the table. Only works on powers of 2. + offset_num &= modulo; + } +} - // Contain the leftmost column's bytes in both lanes of lm_8 - __m256i lm_8 = _mm256_packus_epi16 (leftmosts, zero); - lm_8 = _mm256_permute4x64_epi64(lm_8, _MM_SHUFFLE(2, 0, 2, 0)); +static void angular_pred_linear_filter_w8_ver_avx2(uvg_pixel* dst, uvg_pixel* ref, const int height, const int16_t* delta_int, const int pred_mode) +{ + const int width = 8; + const __m128i v16s = _mm_set1_epi16(16); + const __m128i vshuf = _mm_setr_epi8( + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, + 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08 + ); + const int wide_angle = pred_mode > 66 || pred_mode < 2; + const int mode_idx = wide_angle ? (pred_mode < 2 ? 12 + pred_mode : 80 - pred_mode) : (pred_mode <= 34 ? (pred_mode - 2) : (66 - pred_mode)); + const int8_t* coeff_table = wide_angle ? intra_chroma_linear_interpolation_weights_w8_ver_wide_angle : intra_chroma_linear_interpolation_weights_w8_ver; + const int coeff_table_offset = mode_idx * 64; + + // Height has to be at least 2, handle 2 lines at once + for (int y = 0; y < height; y += 2) { + const int16_t* coeff_tmp0 = (const int16_t*) &coeff_table[coeff_table_offset + (y << 1) + 0]; + const int16_t* coeff_tmp1 = (const int16_t*) &coeff_table[coeff_table_offset + (y << 1) + 2]; + + __m128i vsrc0 = _mm_loadu_si128((const __m128i*) & ref[delta_int[y + 0] + 1]); + __m128i vsrc1 = _mm_loadu_si128((const __m128i*) & ref[delta_int[y + 1] + 1]); - __m256i lm8_r1 = _mm256_srli_epi32 (lm_8, 8); - __m256i r1r1 = _mm256_blendv_epi8 (dc_8, lm8_r1, lm8_bmask); - __m256i r0r1 = _mm256_blend_epi32 (r0r0, r1r1, 0xf0); + vsrc0 = _mm_shuffle_epi8(vsrc0, vshuf); + vsrc1 = _mm_shuffle_epi8(vsrc1, vshuf); - _mm256_storeu_si256((__m256i *)out_block, r0r1); + const __m128i vcoeff0 = _mm_set1_epi16(*coeff_tmp0); + const __m128i vcoeff1 = _mm_set1_epi16(*coeff_tmp1); - // Starts from 2 because row 0 (and row 1) is handled separately - __m256i lm8_l = _mm256_bsrli_epi128 (lm_8, 2); - __m256i lm8_h = _mm256_bsrli_epi128 (lm_8, 3); - lm_8 = _mm256_blend_epi32 (lm8_l, lm8_h, 0xf0); + __m128i res0 = _mm_maddubs_epi16(vsrc0, vcoeff0); + __m128i res1 = _mm_maddubs_epi16(vsrc1, vcoeff1); + res0 = _mm_add_epi16(res0, v16s); + res1 = _mm_add_epi16(res1, v16s); + res0 = _mm_srai_epi16(res0, 5); + res1 = _mm_srai_epi16(res1, 5); - for (uint32_t y = 2; y < 16; y += 2) { - __m256i curr_row = _mm256_blendv_epi8 (dc_8, lm_8, lm8_bmask); - _mm256_storeu_si256((__m256i *)(out_block + (y << 4)), curr_row); - lm_8 = _mm256_bsrli_epi128(lm_8, 2); + _mm_store_si128((__m128i*)dst, _mm_packus_epi16(res0, res1)); + dst += 16; } } -static void pred_filtered_dc_32x32(const uint8_t *ref_top, - const uint8_t *ref_left, - uint8_t *out_block, - const uint8_t multi_ref_idx) +static void angular_pred_linear_filter_w16_ver_avx2(uvg_pixel* dst, uvg_pixel* ref, const int height, const int16_t* delta_int, const int pred_mode) { - const __m256i rt = _mm256_loadu_si256((const __m256i *)(ref_top + 1)); - const __m256i rl = _mm256_loadu_si256((const __m256i *)(ref_left + 1)); + const __m128i v16s = _mm_set1_epi16(16); + const __m128i vshuf = _mm_setr_epi8( + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, + 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08 + ); - const __m256i zero = _mm256_setzero_si256(); - const __m256i twos = _mm256_set1_epi8(2); + const int wide_angle = pred_mode > 66 || pred_mode < 2; + const int mode_idx = wide_angle ? (pred_mode < 2 ? 12 + pred_mode : 80 - pred_mode) : (pred_mode <= 34 ? (pred_mode - 2) : (66 - pred_mode)); + const int8_t* coeff_table = wide_angle ? intra_chroma_linear_interpolation_weights_w8_ver_wide_angle : intra_chroma_linear_interpolation_weights_w8_ver; + const int coeff_table_offset = mode_idx * 64; - const __m256i mult_r0lo = _mm256_setr_epi32(0x01030102, 0x01030103, - 0x01030103, 0x01030103, - 0x01030103, 0x01030103, - 0x01030103, 0x01030103); - - const __m256i mult_left = _mm256_set1_epi16(0x0103); - const __m256i lm8_bmask = cvt_u32_si256 (0xff); - - const __m256i bshif_msk = _mm256_setr_epi32(0x04030201, 0x08070605, - 0x0c0b0a09, 0x800f0e0d, - 0x03020100, 0x07060504, - 0x0b0a0908, 0x0f0e0d0c); - __m256i debias = cvt_u32_si256(32); - __m256i sad0_t = _mm256_sad_epu8 (rt, zero); - __m256i sad0_l = _mm256_sad_epu8 (rl, zero); - __m256i sad0 = _mm256_add_epi64 (sad0_t, sad0_l); - - __m256i sad1 = _mm256_permute4x64_epi64(sad0, _MM_SHUFFLE(1, 0, 3, 2)); - __m256i sad2 = _mm256_add_epi64 (sad0, sad1); - __m256i sad3 = _mm256_shuffle_epi32 (sad2, _MM_SHUFFLE(1, 0, 3, 2)); - __m256i sad4 = _mm256_add_epi64 (sad2, sad3); - __m256i sad5 = _mm256_add_epi64 (sad4, debias); - __m256i dc_64 = _mm256_srli_epi64 (sad5, 6); - - __m128i dc_64_ = _mm256_castsi256_si128 (dc_64); - __m256i dc_8 = _mm256_broadcastb_epi8 (dc_64_); - - __m256i rtlo = _mm256_unpacklo_epi8 (rt, zero); - __m256i rllo = _mm256_unpacklo_epi8 (rl, zero); - __m256i rthi = _mm256_unpackhi_epi8 (rt, zero); - __m256i rlhi = _mm256_unpackhi_epi8 (rl, zero); - - __m256i dc_addend = _mm256_unpacklo_epi8 (dc_8, twos); - __m256i r0lo = _mm256_maddubs_epi16 (dc_addend, mult_r0lo); - __m256i r0hi = _mm256_maddubs_epi16 (dc_addend, mult_left); - __m256i c0dc = r0hi; - - r0lo = _mm256_add_epi16 (r0lo, rtlo); - r0hi = _mm256_add_epi16 (r0hi, rthi); - - __m256i rlr0 = _mm256_blendv_epi8 (zero, rl, lm8_bmask); - r0lo = _mm256_add_epi16 (r0lo, rlr0); - - r0lo = _mm256_srli_epi16 (r0lo, 2); - r0hi = _mm256_srli_epi16 (r0hi, 2); - __m256i r0 = _mm256_packus_epi16 (r0lo, r0hi); - - _mm256_storeu_si256((__m256i *)out_block, r0); - - __m256i c0lo = _mm256_add_epi16 (c0dc, rllo); - __m256i c0hi = _mm256_add_epi16 (c0dc, rlhi); - c0lo = _mm256_srli_epi16 (c0lo, 2); - c0hi = _mm256_srli_epi16 (c0hi, 2); - - __m256i c0 = _mm256_packus_epi16 (c0lo, c0hi); - - // r0 already handled! - for (uint32_t y = 1; y < 32; y++) { - if (y == 16) { - c0 = _mm256_permute4x64_epi64(c0, _MM_SHUFFLE(1, 0, 3, 2)); - } else { - c0 = _mm256_shuffle_epi8 (c0, bshif_msk); - } - __m256i curr_row = _mm256_blendv_epi8 (dc_8, c0, lm8_bmask); - _mm256_storeu_si256(((__m256i *)out_block) + y, curr_row); + // Handle 1 line at a time + for (int y = 0; y < height; ++y) { + const int16_t* coeff_tmp = (const int16_t*)&coeff_table[coeff_table_offset + (y << 1)]; + __m128i vcoeff = _mm_set1_epi16(*coeff_tmp); + + __m128i vsrc0 = _mm_loadu_si128((const __m128i*)&ref[delta_int[y] + 0 + 1]); + __m128i vsrc1 = _mm_loadu_si128((const __m128i*)&ref[delta_int[y] + 8 + 1]); + + vsrc0 = _mm_shuffle_epi8(vsrc0, vshuf); + vsrc1 = _mm_shuffle_epi8(vsrc1, vshuf); + + __m128i res0 = _mm_maddubs_epi16(vsrc0, vcoeff); + __m128i res1 = _mm_maddubs_epi16(vsrc1, vcoeff); + res0 = _mm_add_epi16(res0, v16s); + res1 = _mm_add_epi16(res1, v16s); + res0 = _mm_srai_epi16(res0, 5); + res1 = _mm_srai_epi16(res1, 5); + + _mm_store_si128((__m128i*)dst, _mm_packus_epi16(res0, res1)); + dst += 16; } } -/** -* \brief Generage intra DC prediction with post filtering applied. -* \param log2_width Log2 of width, range 2..5. -* \param in_ref_above Pointer to -1 index of above reference, length=width*2+1. -* \param in_ref_left Pointer to -1 index of left reference, length=width*2+1. -* \param dst Buffer of size width*width. -* \param multi_ref_idx Reference line index. May be non-zero when MRL is used. -*/ -static void uvg_intra_pred_filtered_dc_avx2( - const int_fast8_t log2_width, - const uint8_t *ref_top, - const uint8_t *ref_left, - uint8_t *out_block, - const uint8_t multi_ref_idx) +static void angular_pred_linear_filter_w32_ver_avx2(uvg_pixel* dst, uvg_pixel* ref, const int height, const int16_t* delta_int, const int pred_mode) { - assert(log2_width >= 2 && log2_width <= 5); + const __m256i v16s = _mm256_set1_epi16(16); + const __m256i vshuf = _mm256_setr_epi8( + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, + 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, + 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08 + ); - // TODO: implement multi reference index for all subfunctions - if (log2_width == 2) { - pred_filtered_dc_4x4(ref_top, ref_left, out_block, multi_ref_idx); - } else if (log2_width == 3) { - pred_filtered_dc_8x8(ref_top, ref_left, out_block, multi_ref_idx); - } else if (log2_width == 4) { - pred_filtered_dc_16x16(ref_top, ref_left, out_block, multi_ref_idx); - } else if (log2_width == 5) { - pred_filtered_dc_32x32(ref_top, ref_left, out_block, multi_ref_idx); + const int wide_angle = pred_mode > 66 || pred_mode < 2; + const int mode_idx = wide_angle ? (pred_mode < 2 ? 12 + pred_mode : 80 - pred_mode) : (pred_mode <= 34 ? (pred_mode - 2) : (66 - pred_mode)); + const int8_t* coeff_table = wide_angle ? intra_chroma_linear_interpolation_weights_w8_ver_wide_angle : intra_chroma_linear_interpolation_weights_w8_ver; + const int coeff_table_offset = mode_idx * 64; + + // Handle 1 line at a time + for (int y = 0; y < height; ++y) { + const int16_t* coeff_tmp = (const int16_t*)&coeff_table[coeff_table_offset + (y << 1)]; + __m256i vcoeff = _mm256_set1_epi16(*coeff_tmp); + + ALIGNED(32) __m128i vsrc[4]; + vsrc[0] = _mm_loadu_si128((const __m128i*) & ref[delta_int[y] + 0 + 1]); + vsrc[1] = _mm_loadu_si128((const __m128i*) & ref[delta_int[y] + 16 + 1]); // Flip these two middle sources. They will be later flipped back into place by packus + vsrc[2] = _mm_loadu_si128((const __m128i*) & ref[delta_int[y] + 8 + 1]); + vsrc[3] = _mm_loadu_si128((const __m128i*) & ref[delta_int[y] + 24 + 1]); + + __m256i* vsrc256 = (__m256i*)vsrc; + vsrc256[0] = _mm256_shuffle_epi8(vsrc256[0], vshuf); + vsrc256[1] = _mm256_shuffle_epi8(vsrc256[1], vshuf); + + __m256i res0 = _mm256_maddubs_epi16(vsrc256[0], vcoeff); + __m256i res1 = _mm256_maddubs_epi16(vsrc256[1], vcoeff); + res0 = _mm256_add_epi16(res0, v16s); + res1 = _mm256_add_epi16(res1, v16s); + res0 = _mm256_srai_epi16(res0, 5); + res1 = _mm256_srai_epi16(res1, 5); + + _mm256_store_si256((__m256i*)dst, _mm256_packus_epi16(res0, res1)); + dst += 32; } } -// TODO: update all ranges (in comments, etc.) from HEVC to VVC -/** -* \brief Position Dependent Prediction Combination for Planar and DC modes. -* \param log2_width Log2 of width, range 2..5. -* \param width Block width matching log2_width. -* \param used_ref Pointer used reference pixel struct. -* \param dst Buffer of size width*width. -*/ -static void uvg_pdpc_planar_dc_avx2( - const int mode, - const cu_loc_t* const cu_loc, - const color_t color, - const uvg_intra_ref *const used_ref, - uvg_pixel *const dst) +static void angular_pred_linear_filter_w4_hor_avx2(uvg_pixel* dst, uvg_pixel* ref, const int height, const int mode, const int16_t* delta_int) { - // ISP_TODO: non-square block implementation, height is passed but not used - assert(mode == 0 || mode == 1); // planar or DC - const int width = color == COLOR_Y ? cu_loc->width : cu_loc->chroma_width; - const int height = color == COLOR_Y ? cu_loc->height : cu_loc->chroma_height; - const int log2_width = uvg_g_convert_to_log2[width]; - const int log2_height = uvg_g_convert_to_log2[height]; + const int16_t* dint = delta_int; + const __m128i v16s = _mm_set1_epi16(16); + + const int16_t weigth_offset = mode_to_weight_table_offset_w4_hor[mode]; + const int16_t shuf_offset = mode_to_shuffle_vector_table_offset_w4_hor[mode]; + + __m128i vcoeff = _mm_load_si128((const __m128i*) &intra_chroma_linear_interpolation_weights_w4_hor[weigth_offset]); + __m128i vshuf0 = _mm_load_si128((const __m128i*) &intra_chroma_linear_interpolation_shuffle_vectors_w4_hor[shuf_offset + 0]); + __m128i vshuf1 = _mm_load_si128((const __m128i*) &intra_chroma_linear_interpolation_shuffle_vectors_w4_hor[shuf_offset + 16]); + + // Load refs from smallest index onwards, shuffle will handle the rest. The smallest index will be at one of these delta int table indices + const int16_t min_offset = 1 + MIN(dint[0], dint[3]); + + // Height has to be at least 4, handle 4 lines at once + for (int y = 0; y < height; y += 4) { + // Prepare sources + __m128i vidx = _mm_set_epi64x((long long int)(min_offset + y + 2), (long long int)(min_offset + y + 0)); + __m128i vsrc_tmp = _mm_i64gather_epi64((const long long*)ref, vidx, 1); + __m128i vsrc0 = _mm_shuffle_epi8(vsrc_tmp, vshuf0); + __m128i vsrc1 = _mm_shuffle_epi8(vsrc_tmp, vshuf1); + + __m128i res0 = _mm_maddubs_epi16(vsrc0, vcoeff); + __m128i res1 = _mm_maddubs_epi16(vsrc1, vcoeff); + res0 = _mm_add_epi16(res0, v16s); + res1 = _mm_add_epi16(res1, v16s); + res0 = _mm_srai_epi16(res0, 5); + res1 = _mm_srai_epi16(res1, 5); + + _mm_store_si128((__m128i*)dst, _mm_packus_epi16(res0, res1)); + dst += 16; + } +} - __m256i shuf_mask_byte = _mm256_setr_epi8( - 0, -1, 0, -1, 0, -1, 0, -1, - 1, -1, 1, -1, 1, -1, 1, -1, - 2, -1, 2, -1, 2, -1, 2, -1, - 3, -1, 3, -1, 3, -1, 3, -1 - ); +static void angular_pred_linear_filter_w8_hor_avx2(uvg_pixel* dst, uvg_pixel* ref, const int height, const int mode, const int16_t* delta_int) +{ + const int16_t* dint = delta_int; + const __m128i v16s = _mm_set1_epi16(16); + const int16_t weigth_offset = (mode - 2) * 16; + const int16_t shuf_offset = (mode - 2) * 32; + + __m128i vcoeff = _mm_load_si128((const __m128i*) & intra_chroma_linear_interpolation_weights_w8_hor[weigth_offset]); + __m128i vshuf0 = _mm_load_si128((const __m128i*) & intra_chroma_linear_interpolation_shuffle_vectors_w8_hor[shuf_offset + 0]); + __m128i vshuf1 = _mm_load_si128((const __m128i*) & intra_chroma_linear_interpolation_shuffle_vectors_w8_hor[shuf_offset + 16]); + + // Load refs from smallest index onwards, shuffle will handle the rest. The smallest index will be at one of these delta int table indices + const int16_t min_offset = 1 + MIN(dint[0], dint[7]); + + // Height has to be at least 2, handle 2 lines at once + for (int y = 0; y < height; y += 2) { + // Prepare sources + __m128i vsrc_tmp = _mm_loadu_si128((__m128i*)&ref[min_offset + y]); + const __m128i vsrc0 = _mm_shuffle_epi8(vsrc_tmp, vshuf0); + const __m128i vsrc1 = _mm_shuffle_epi8(vsrc_tmp, vshuf1); + + __m128i res0 = _mm_maddubs_epi16(vsrc0, vcoeff); + __m128i res1 = _mm_maddubs_epi16(vsrc1, vcoeff); + res0 = _mm_add_epi16(res0, v16s); + res1 = _mm_add_epi16(res1, v16s); + res0 = _mm_srai_epi16(res0, 5); + res1 = _mm_srai_epi16(res1, 5); + + _mm_store_si128((__m128i*)dst, _mm_packus_epi16(res0, res1)); + dst += 16; + } +} - __m256i shuf_mask_word = _mm256_setr_epi8( - 0, 1, 0, 1, 0, 1, 0, 1, - 2, 3, 2, 3, 2, 3, 2, 3, - 4, 5, 4, 5, 4, 5, 4, 5, - 6, 7, 6, 7, 6, 7, 6, 7 - ); +static void angular_pred_linear_filter_w16_hor_avx2(uvg_pixel* dst, uvg_pixel* ref, const int height, const int mode, const int16_t* delta_int) +{ + const int16_t* dint = delta_int; + const __m128i v16s = _mm_set1_epi16(16); + const int16_t weigth_offset = (mode - 2) * 32; + const int16_t shuf_offset = (mode - 2) * 64; + + __m128i vcoeff0 = _mm_load_si128((const __m128i*) & intra_chroma_linear_interpolation_weights_w16_hor[weigth_offset + 0]); + __m128i vcoeff1 = _mm_load_si128((const __m128i*) & intra_chroma_linear_interpolation_weights_w16_hor[weigth_offset + 16]); + __m128i vshuf0 = _mm_load_si128((const __m128i*) & intra_chroma_linear_interpolation_shuffle_vectors_w16_hor[shuf_offset + 0]); + __m128i vshuf1 = _mm_load_si128((const __m128i*) & intra_chroma_linear_interpolation_shuffle_vectors_w16_hor[shuf_offset + 16]); + __m128i vshuf2 = _mm_load_si128((const __m128i*) & intra_chroma_linear_interpolation_shuffle_vectors_w16_hor[shuf_offset + 32]); + __m128i vshuf3 = _mm_load_si128((const __m128i*) & intra_chroma_linear_interpolation_shuffle_vectors_w16_hor[shuf_offset + 48]); + + // Load refs from smallest index onwards, shuffle will handle the rest. The smallest index will be at one of these delta int table indices + const int16_t min_offset0 = 1 + MIN(dint[0], dint[7]); + const int16_t min_offset1 = 1 + MIN(dint[8], dint[15]); + + // Height has to be at least 2, there is no 16x1 block for chroma. + for (int y = 0; y < height; y += 2) { + // Prepare sources + __m128i vsrc_tmp0 = _mm_loadu_si128((__m128i*) &ref[min_offset0 + y]); + __m128i vsrc_tmp1 = _mm_loadu_si128((__m128i*) &ref[min_offset1 + y]); + const __m128i vsrc0 = _mm_shuffle_epi8(vsrc_tmp0, vshuf0); + const __m128i vsrc1 = _mm_shuffle_epi8(vsrc_tmp1, vshuf1); + const __m128i vsrc2 = _mm_shuffle_epi8(vsrc_tmp0, vshuf2); + const __m128i vsrc3 = _mm_shuffle_epi8(vsrc_tmp1, vshuf3); + + __m128i res0 = _mm_maddubs_epi16(vsrc0, vcoeff0); + __m128i res1 = _mm_maddubs_epi16(vsrc1, vcoeff1); + __m128i res2 = _mm_maddubs_epi16(vsrc2, vcoeff0); + __m128i res3 = _mm_maddubs_epi16(vsrc3, vcoeff1); + res0 = _mm_add_epi16(res0, v16s); + res1 = _mm_add_epi16(res1, v16s); + res2 = _mm_add_epi16(res2, v16s); + res3 = _mm_add_epi16(res3, v16s); + res0 = _mm_srai_epi16(res0, 5); + res1 = _mm_srai_epi16(res1, 5); + res2 = _mm_srai_epi16(res2, 5); + res3 = _mm_srai_epi16(res3, 5); + + _mm_store_si128((__m128i*)&dst[0], _mm_packus_epi16(res0, res1)); + _mm_store_si128((__m128i*)&dst[16], _mm_packus_epi16(res2, res3)); + dst += 32; + } +} - // TODO: replace latter log2_width with log2_height - const int scale = ((log2_width - 2 + log2_width - 2 + 2) >> 2); +static void angular_pred_linear_filter_w32_hor_avx2(uvg_pixel* dst, uvg_pixel* ref, const int height, const int mode, const int16_t* delta_int) +{ + const int16_t* dint = delta_int; + const __m128i v16s = _mm_set1_epi16(16); + const int16_t weigth_offset = (mode - 2) * 64; + const int16_t shuf_offset = (mode - 2) * 64; + + __m128i vcoeff0 = _mm_load_si128((const __m128i*) &intra_chroma_linear_interpolation_weights_w32_hor[weigth_offset + 0]); + __m128i vcoeff1 = _mm_load_si128((const __m128i*) &intra_chroma_linear_interpolation_weights_w32_hor[weigth_offset + 16]); + __m128i vcoeff2 = _mm_load_si128((const __m128i*) &intra_chroma_linear_interpolation_weights_w32_hor[weigth_offset + 32]); + __m128i vcoeff3 = _mm_load_si128((const __m128i*) &intra_chroma_linear_interpolation_weights_w32_hor[weigth_offset + 48]); + __m128i vshuf0 = _mm_load_si128((const __m128i*) &intra_chroma_linear_interpolation_shuffle_vectors_w32_hor[shuf_offset + 0]); + __m128i vshuf1 = _mm_load_si128((const __m128i*) &intra_chroma_linear_interpolation_shuffle_vectors_w32_hor[shuf_offset + 16]); + __m128i vshuf2 = _mm_load_si128((const __m128i*) &intra_chroma_linear_interpolation_shuffle_vectors_w32_hor[shuf_offset + 32]); + __m128i vshuf3 = _mm_load_si128((const __m128i*) &intra_chroma_linear_interpolation_shuffle_vectors_w32_hor[shuf_offset + 48]); + + // Load refs from smallest index onwards, shuffle will handle the rest. The smallest index will be at one of these delta int table indices + // Due to width, two loads are needed, and therefore two offsets. Cannot use 256-bit loads due to alignment issues. + const int16_t min_offset0 = 1 + MIN(dint[0], dint[15]); + const int16_t min_offset1 = 1 + MIN(dint[16], dint[31]); + + // Height has to be at least 2. Due to width, handle 1 line at a time + for (int y = 0; y < height; ++y) { + // Prepare sources + __m128i vsrc_tmp0 = _mm_loadu_si128((__m128i*) &ref[min_offset0 + y]); + __m128i vsrc_tmp1 = _mm_loadu_si128((__m128i*) &ref[min_offset1 + y]); + __m128i vsrc0 = _mm_shuffle_epi8(vsrc_tmp0, vshuf0); + __m128i vsrc1 = _mm_shuffle_epi8(vsrc_tmp0, vshuf1); + __m128i vsrc2 = _mm_shuffle_epi8(vsrc_tmp1, vshuf2); + __m128i vsrc3 = _mm_shuffle_epi8(vsrc_tmp1, vshuf3); + + __m128i res0 = _mm_maddubs_epi16(vsrc0, vcoeff0); + __m128i res1 = _mm_maddubs_epi16(vsrc1, vcoeff1); + __m128i res2 = _mm_maddubs_epi16(vsrc2, vcoeff2); + __m128i res3 = _mm_maddubs_epi16(vsrc3, vcoeff3); + res0 = _mm_add_epi16(res0, v16s); + res1 = _mm_add_epi16(res1, v16s); + res2 = _mm_add_epi16(res2, v16s); + res3 = _mm_add_epi16(res3, v16s); + res0 = _mm_srai_epi16(res0, 5); + res1 = _mm_srai_epi16(res1, 5); + res2 = _mm_srai_epi16(res2, 5); + res3 = _mm_srai_epi16(res3, 5); + + _mm_store_si128((__m128i*)&dst[0], _mm_packus_epi16(res0, res1)); + _mm_store_si128((__m128i*)&dst[16], _mm_packus_epi16(res2, res3)); + dst += 32; + } +} - // Same weights regardless of axis, compute once - int16_t w[LCU_WIDTH]; - for (int i = 0; i < width; i += 4) { - __m128i base = _mm_set1_epi32(i); - __m128i offs = _mm_setr_epi32(0, 1, 2, 3); - __m128i idxs = _mm_add_epi32(base, offs); - __m128i unclipped = _mm_slli_epi32(idxs, 1); - unclipped = _mm_srli_epi32(unclipped, scale); - __m128i clipped = _mm_min_epi32( _mm_set1_epi32(31), unclipped); - __m128i weights = _mm_srlv_epi32(_mm_set1_epi32(32), clipped); - weights = _mm_packus_epi32(weights, weights); - _mm_storel_epi64((__m128i*)&w[i], weights); + +static void angular_pred_linear_filter_w4_hor_wide_angle_avx2(uvg_pixel* dst, uvg_pixel* ref, const int height, const int mode, const int16_t* delta_int) +{ + const __m128i v16s = _mm_set1_epi16(16); + + const int mode_idx = mode < 2 ? mode + 12 : 80 - mode; + const int table_offset = mode_idx * 128; + + const __m128i vcoeff0 = _mm_load_si128((const __m128i*) &intra_chroma_linear_interpolation_weights_w16_hor_wide_angle[table_offset + 0]); + const __m128i vcoeff1 = _mm_load_si128((const __m128i*) &intra_chroma_linear_interpolation_weights_w16_hor_wide_angle[table_offset + 16]); + + const __m128i vshuf = _mm_setr_epi8( + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, + 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b, 0x0b, 0x0c + ); + + const __m128i vtranspose = _mm_setr_epi8( + 0x00, 0x04, 0x08, 0x0c, 0x01, 0x05, 0x09, 0x0d, + 0x02, 0x06, 0x0a, 0x0e, 0x03, 0x07, 0x0b, 0x0f + ); + + const __m256i vidx = _mm256_setr_epi64x(delta_int[0], delta_int[1], delta_int[2], delta_int[3]); + + // Height has to be at least 4, handle 4 lines at once + for (int y = 0; y < height; y += 4) { + const __m256i vsrc_raw = _mm256_i64gather_epi64((const long long*)&ref[y + 1], vidx, 1); + + __m128i vsrc0 = _mm256_extracti128_si256(vsrc_raw, 0); + __m128i vsrc1 = _mm256_extracti128_si256(vsrc_raw, 1); + + vsrc0 = _mm_shuffle_epi8(vsrc0, vshuf); + vsrc1 = _mm_shuffle_epi8(vsrc1, vshuf); + + __m128i res0 = _mm_maddubs_epi16(vsrc0, vcoeff0); + __m128i res1 = _mm_maddubs_epi16(vsrc1, vcoeff1); + res0 = _mm_add_epi16(res0, v16s); + res1 = _mm_add_epi16(res1, v16s); + res0 = _mm_srai_epi16(res0, 5); + res1 = _mm_srai_epi16(res1, 5); + __m128i vfinal = _mm_packus_epi16(res0, res1); + vfinal = _mm_shuffle_epi8(vfinal, vtranspose); + + _mm_store_si128((__m128i*)dst, vfinal); + dst += 16; } +} - // Process in 4x4 blocks - // TODO: replace width with height - for (int y = 0; y < width; y += 4) { - for (int x = 0; x < width; x += 4) { +static void angular_pred_linear_filter_w8_hor_wide_angle_avx2(uvg_pixel* dst, uvg_pixel* ref, const int height, const int mode, const int16_t* delta_int, const int16_t* delta_fract) +{ + const int width = 8; + const __m128i v16s = _mm_set1_epi16(16); - uint32_t dw_left; - uint32_t dw_top; - memcpy(&dw_left, &used_ref->left[y + 1], sizeof(dw_left)); - memcpy(&dw_top , &used_ref->top [x + 1], sizeof(dw_top)); - __m256i vleft = _mm256_set1_epi32(dw_left); - __m256i vtop = _mm256_set1_epi32(dw_top); - vleft = _mm256_shuffle_epi8(vleft, shuf_mask_byte); - vtop = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(vtop)); + const __m128i vshuf = _mm_setr_epi8( + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, + 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b, 0x0b, 0x0c + ); - __m128i vseq = _mm_setr_epi32(0, 1, 2, 3); - __m128i vidx = _mm_slli_epi32(vseq, log2_width); - __m128i vdst = _mm_i32gather_epi32((const int32_t*)(dst + y * width + x), vidx, 1); - __m256i vdst16 = _mm256_cvtepu8_epi16(vdst); - uint64_t quad_wL; - uint64_t quad_wT; - memcpy(&quad_wL, &w[x], sizeof(quad_wL)); - memcpy(&quad_wT, &w[y], sizeof(quad_wT)); - __m256i vwL = _mm256_set1_epi64x(quad_wL); - __m256i vwT = _mm256_set1_epi64x(quad_wT); - vwT = _mm256_shuffle_epi8(vwT, shuf_mask_word); - __m256i diff_left = _mm256_sub_epi16(vleft, vdst16); - __m256i diff_top = _mm256_sub_epi16(vtop , vdst16); - __m256i prod_left = _mm256_mullo_epi16(vwL, diff_left); - __m256i prod_top = _mm256_mullo_epi16(vwT, diff_top); - __m256i accu = _mm256_add_epi16(prod_left, prod_top); - accu = _mm256_add_epi16(accu, _mm256_set1_epi16(32)); + const __m128i vtranspose = _mm_setr_epi8( + 0x00, 0x04, 0x08, 0x0c, 0x01, 0x05, 0x09, 0x0d, + 0x02, 0x06, 0x0a, 0x0e, 0x03, 0x07, 0x0b, 0x0f + ); + + const __m128i vidxshuf = _mm_setr_epi8(0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff); // Don't care + __m128i vidx_raw = _mm_load_si128((__m128i*)delta_int); + + const __m256i vidx0 = _mm256_cvtepi16_epi64(vidx_raw); + vidx_raw = _mm_shuffle_epi8(vidx_raw, vidxshuf); + const __m256i vidx1 = _mm256_cvtepi16_epi64(vidx_raw); + + const int mode_idx = mode < 2 ? mode + 12 : 80 - mode; + const int table_offset = mode_idx * 128; + + const __m128i vcoeff0 = _mm_load_si128((const __m128i*) & intra_chroma_linear_interpolation_weights_w16_hor_wide_angle[table_offset + 0]); + const __m128i vcoeff1 = _mm_load_si128((const __m128i*) & intra_chroma_linear_interpolation_weights_w16_hor_wide_angle[table_offset + 16]); + const __m128i vcoeff2 = _mm_load_si128((const __m128i*) & intra_chroma_linear_interpolation_weights_w16_hor_wide_angle[table_offset + 32]); + const __m128i vcoeff3 = _mm_load_si128((const __m128i*) & intra_chroma_linear_interpolation_weights_w16_hor_wide_angle[table_offset + 48]); + + // Height has to be at least 2. Handle as 4x4 blocks. Special handling needed when height == 2. + // TODO: make sure this function is not called when height is 2. + for (int y = 0; y < height; y += 4) { + const __m256i vsrc_raw0 = _mm256_i64gather_epi64((const long long*)&ref[y + 1], vidx0, 1); + const __m256i vsrc_raw1 = _mm256_i64gather_epi64((const long long*)&ref[y + 1], vidx1, 1); + + __m128i vsrc0 = _mm256_extracti128_si256(vsrc_raw0, 0); + __m128i vsrc1 = _mm256_extracti128_si256(vsrc_raw0, 1); + __m128i vsrc2 = _mm256_extracti128_si256(vsrc_raw1, 0); + __m128i vsrc3 = _mm256_extracti128_si256(vsrc_raw1, 1); + + vsrc0 = _mm_shuffle_epi8(vsrc0, vshuf); + vsrc1 = _mm_shuffle_epi8(vsrc1, vshuf); + vsrc2 = _mm_shuffle_epi8(vsrc2, vshuf); + vsrc3 = _mm_shuffle_epi8(vsrc3, vshuf); + + __m128i res0 = _mm_maddubs_epi16(vsrc0, vcoeff0); + __m128i res1 = _mm_maddubs_epi16(vsrc1, vcoeff1); + __m128i res2 = _mm_maddubs_epi16(vsrc2, vcoeff2); + __m128i res3 = _mm_maddubs_epi16(vsrc3, vcoeff3); + res0 = _mm_add_epi16(res0, v16s); + res1 = _mm_add_epi16(res1, v16s); + res2 = _mm_add_epi16(res2, v16s); + res3 = _mm_add_epi16(res3, v16s); + res0 = _mm_srai_epi16(res0, 5); + res1 = _mm_srai_epi16(res1, 5); + res2 = _mm_srai_epi16(res2, 5); + res3 = _mm_srai_epi16(res3, 5); + + __m128i vtmp0 = _mm_packus_epi16(res0, res1); + __m128i vtmp1 = _mm_packus_epi16(res2, res3); + vtmp0 = _mm_shuffle_epi8(vtmp0, vtranspose); + vtmp1 = _mm_shuffle_epi8(vtmp1, vtranspose); + + __m128i vfinal0 = _mm_unpacklo_epi32(vtmp0, vtmp1); + __m128i vfinal1 = _mm_unpackhi_epi32(vtmp0, vtmp1); + + + _mm_store_si128((__m128i*)&dst[0], vfinal0); + _mm_store_si128((__m128i*)&dst[16], vfinal1); + dst += 32; + } +} + +static void angular_pred_linear_filter_w16_hor_wide_angle_avx2(uvg_pixel* dst, uvg_pixel* ref, const int height, const int mode, const int16_t* delta_int, const int16_t* delta_fract) +{ + const int width = 16; + const __m128i v16s = _mm_set1_epi16(16); + + const __m128i vshuf = _mm_setr_epi8( + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, + 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b, 0x0b, 0x0c + ); + + const __m128i vtranspose = _mm_setr_epi8( + 0x00, 0x04, 0x08, 0x0c, 0x01, 0x05, 0x09, 0x0d, + 0x02, 0x06, 0x0a, 0x0e, 0x03, 0x07, 0x0b, 0x0f + ); + + const __m128i vidxshuf = _mm_setr_epi8(0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff); // Don't care + + __m128i vidx_raw0 = _mm_load_si128((__m128i*) & delta_int[0]); + __m128i vidx_raw1 = _mm_load_si128((__m128i*) & delta_int[8]); + + __m256i vidx[4]; + vidx[0] = _mm256_cvtepi16_epi64(vidx_raw0); + vidx_raw0 = _mm_shuffle_epi8(vidx_raw0, vidxshuf); + vidx[1] = _mm256_cvtepi16_epi64(vidx_raw0); + + vidx[2] = _mm256_cvtepi16_epi64(vidx_raw1); + vidx_raw1 = _mm_shuffle_epi8(vidx_raw1, vidxshuf); + vidx[3] = _mm256_cvtepi16_epi64(vidx_raw1); + + const int mode_idx = mode < 2 ? mode + 12 : 80 - mode; + const int table_offset = mode_idx * 128; + + __m128i vcoeff[8]; + for (int i = 0, o = 0; i < 8; ++i, o += 16) { + vcoeff[i] = _mm_load_si128((const __m128i*) & intra_chroma_linear_interpolation_weights_w16_hor_wide_angle[table_offset + o]); + } + + // Height has to be at least 2. Handle as 4x4 blocks. Special handling needed when height < 4. + // TODO: make sure this function is not called when height is less than 4. + for (int y = 0; y < height; y += 4) { + __m128i vtmp[4]; + for (int x = 0, v = 0, c = 0; x < width; x += 8, v += 2, c += 4) { + const __m256i vsrc_raw0 = _mm256_i64gather_epi64((const long long*)&ref[y + 1], vidx[v + 0], 1); + const __m256i vsrc_raw1 = _mm256_i64gather_epi64((const long long*)&ref[y + 1], vidx[v + 1], 1); + + __m128i vsrc0 = _mm256_extracti128_si256(vsrc_raw0, 0); + __m128i vsrc1 = _mm256_extracti128_si256(vsrc_raw0, 1); + __m128i vsrc2 = _mm256_extracti128_si256(vsrc_raw1, 0); + __m128i vsrc3 = _mm256_extracti128_si256(vsrc_raw1, 1); + + vsrc0 = _mm_shuffle_epi8(vsrc0, vshuf); + vsrc1 = _mm_shuffle_epi8(vsrc1, vshuf); + vsrc2 = _mm_shuffle_epi8(vsrc2, vshuf); + vsrc3 = _mm_shuffle_epi8(vsrc3, vshuf); + + __m128i res0 = _mm_maddubs_epi16(vsrc0, vcoeff[c + 0]); + __m128i res1 = _mm_maddubs_epi16(vsrc1, vcoeff[c + 1]); + __m128i res2 = _mm_maddubs_epi16(vsrc2, vcoeff[c + 2]); + __m128i res3 = _mm_maddubs_epi16(vsrc3, vcoeff[c + 3]); + res0 = _mm_add_epi16(res0, v16s); + res1 = _mm_add_epi16(res1, v16s); + res2 = _mm_add_epi16(res2, v16s); + res3 = _mm_add_epi16(res3, v16s); + res0 = _mm_srai_epi16(res0, 5); + res1 = _mm_srai_epi16(res1, 5); + res2 = _mm_srai_epi16(res2, 5); + res3 = _mm_srai_epi16(res3, 5); + + vtmp[v + 0] = _mm_packus_epi16(res0, res1); + vtmp[v + 1] = _mm_packus_epi16(res2, res3); + } + vtmp[0] = _mm_shuffle_epi8(vtmp[0], vtranspose); + vtmp[1] = _mm_shuffle_epi8(vtmp[1], vtranspose); + vtmp[2] = _mm_shuffle_epi8(vtmp[2], vtranspose); + vtmp[3] = _mm_shuffle_epi8(vtmp[3], vtranspose); + + __m128i vupk32_lo0 = _mm_unpacklo_epi32(vtmp[0], vtmp[1]); + __m128i vupk32_hi0 = _mm_unpackhi_epi32(vtmp[0], vtmp[1]); + __m128i vupk32_lo1 = _mm_unpacklo_epi32(vtmp[2], vtmp[3]); + __m128i vupk32_hi1 = _mm_unpackhi_epi32(vtmp[2], vtmp[3]); + + __m128i vfinal0 = _mm_unpacklo_epi64(vupk32_lo0, vupk32_lo1); + __m128i vfinal1 = _mm_unpackhi_epi64(vupk32_lo0, vupk32_lo1); + __m128i vfinal2 = _mm_unpacklo_epi64(vupk32_hi0, vupk32_hi1); + __m128i vfinal3 = _mm_unpackhi_epi64(vupk32_hi0, vupk32_hi1); + + _mm_store_si128((__m128i*) & dst[0], vfinal0); + _mm_store_si128((__m128i*) & dst[16], vfinal1); + _mm_store_si128((__m128i*) & dst[32], vfinal2); + _mm_store_si128((__m128i*) & dst[48], vfinal3); + dst += 64; + } +} + +// Used for angles which do not require interpolation. +static void angular_pred_non_fractional_angle_pxl_copy_ver_avx2(uvg_pixel* dst, uvg_pixel* ref, const int width, const int height, const int16_t* delta_int) +{ + // Note: this probably won't work for wide angle modes. + for (int y = 0; y < height; ++y) { + uvg_pixel* dst_row = dst + y * width; + uvg_pixel* ref_row = ref + delta_int[y] + 1; + switch (width) { + case 4: memcpy(dst_row, ref_row, 4 * sizeof(uvg_pixel)); break; + case 8: memcpy(dst_row, ref_row, 8 * sizeof(uvg_pixel)); break; + case 16: memcpy(dst_row, ref_row, 16 * sizeof(uvg_pixel)); break; + case 32: memcpy(dst_row, ref_row, 32 * sizeof(uvg_pixel)); break; + case 64: memcpy(dst_row, ref_row, 64 * sizeof(uvg_pixel)); break; + } + } +} + +// Horizontal pixel copy for prediction mode 2. +static void angular_pred_non_fractional_angle_pxl_copy_w4_mode2_hor_avx2(uvg_pixel* dst, uvg_pixel* ref, const int height, const int multi_ref_offset) +{ + // const int width = 4; + + const __m128i vrefshuf0 = _mm_setr_epi8( + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, + 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06 + ); + + const __m128i vrefshuf1 = _mm_setr_epi8( + 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, + 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a + ); + + // Handle as 4x4 blocks. There is no case where height < 4. + if (height == 4) { + // Offset indices by one since index 0 is top left and plus one since delta_int[0] for mode 2 is 1. + __m128i vref = _mm_loadu_si128((__m128i*)(&ref[2] + multi_ref_offset)); + vref = _mm_shuffle_epi8(vref, vrefshuf0); + + _mm_store_si128((__m128i*)dst, vref); + } + else { + // Can handle 8 rows at once + for (int y = 0; y < height; y += 8) { + + __m128i vref = _mm_loadu_si128((__m128i*)(ref + 2 + y + multi_ref_offset)); + + __m128i vres0 = _mm_shuffle_epi8(vref, vrefshuf0); + __m128i vres1 = _mm_shuffle_epi8(vref, vrefshuf1); + + _mm_store_si128((__m128i*)(dst + 0), vres0); + _mm_store_si128((__m128i*)(dst + 16), vres1); + dst += 32; + } + } +} + +static void angular_pred_non_fractional_angle_pxl_copy_w8_mode2_hor_avx2(uvg_pixel* dst, uvg_pixel* ref, const int height, const int multi_ref_offset) +{ + + const __m128i vrefshuf0 = _mm_setr_epi8( + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 + ); + + const __m128i vrefshuf1 = _mm_setr_epi8( + 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, + 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a + ); + + const __m128i vrefshuf2 = _mm_setr_epi8( + 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, + 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c + ); + + const __m128i vrefshuf3 = _mm_setr_epi8( + 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, + 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e + ); + + // Can handle 8 rows at once. There is no case for height 2 and 4, this function is not reached in those cases. + for (int y = 0; y < height; y += 8) { + // Offset indices by one since index 0 is top left and plus one since delta_int[0] for mode 2 is 1. + __m128i vref = _mm_loadu_si128((__m128i*)(ref + 2 + y + multi_ref_offset)); + _mm_store_si128((__m128i*)(dst + 0), _mm_shuffle_epi8(vref, vrefshuf0)); + _mm_store_si128((__m128i*)(dst + 16), _mm_shuffle_epi8(vref, vrefshuf1)); + _mm_store_si128((__m128i*)(dst + 32), _mm_shuffle_epi8(vref, vrefshuf2)); + _mm_store_si128((__m128i*)(dst + 48), _mm_shuffle_epi8(vref, vrefshuf3)); + dst += 64; + } +} + +// Horizontal pixel copy for wide angles modes. +static void angular_pred_non_fractional_angle_pxl_copy_w4_wide_angle_hor_avx2(uvg_pixel* dst, uvg_pixel* ref, const int height, const int16_t* delta_int) +{ + // const int width = 4; + + const __m128i vtranspose = _mm_setr_epi8( + 0x00, 0x04, 0x08, 0x0c, 0x01, 0x05, 0x09, 0x0d, + 0x02, 0x06, 0x0a, 0x0e, 0x03, 0x07, 0x0b, 0x0f + ); + + //__m128i vidx = _mm_setr_epi32(delta_int[0], delta_int[1], delta_int[2], delta_int[3]); + __m128i vidx = _mm_loadu_si128((__m128i*)delta_int); + vidx = _mm_cvtepi16_epi32(vidx); + + // Handle as 4x4 blocks. There is no case where height < 4. + for (int y = 0; y < height; y += 4) { + // Offset indices by one since index 0 is top left. + + __m128i vref = _mm_i32gather_epi32((const int*)(ref + y + 1), vidx, 1); + + vref = _mm_shuffle_epi8(vref, vtranspose); + + _mm_store_si128((__m128i*)dst, vref); + dst += 16; + } +} + +static void angular_pred_non_fractional_angle_pxl_copy_w8_wide_angle_hor_avx2(uvg_pixel* dst, uvg_pixel* ref, const int height, const int16_t* delta_int) +{ + // const int width = 8; + + // Place the next 4 16-bit delta int values in the lower half of the register. + const __m128i vidxshuf = _mm_setr_epi8( + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff // Don't care. + ); + + // 1st step of the transpose + const __m256i vtranspose0 = _mm256_setr_epi8( + 0x00, 0x08, 0x01, 0x09, 0x02, 0x0a, 0x03, 0x0b, + 0x04, 0x0c, 0x05, 0x0d, 0x06, 0x0e, 0x07, 0x0f, + 0x00, 0x08, 0x01, 0x09, 0x02, 0x0a, 0x03, 0x0b, + 0x04, 0x0c, 0x05, 0x0d, 0x06, 0x0e, 0x07, 0x0f + ); + + // 3rd step of the transpose, after permute4x64_epi64 + const __m256i vtranspose1 = _mm256_setr_epi8( + 0x00, 0x01, 0x08, 0x09, 0x02, 0x03, 0x0a, 0x0b, + 0x04, 0x05, 0x0c, 0x0d, 0x06, 0x07, 0x0e, 0x0f, + 0x00, 0x01, 0x08, 0x09, 0x02, 0x03, 0x0a, 0x0b, + 0x04, 0x05, 0x0c, 0x0d, 0x06, 0x07, 0x0e, 0x0f + ); + + const __m128i vidx = _mm_loadu_si128((__m128i*)delta_int); + const __m256i vidx0 = _mm256_cvtepi16_epi64(vidx); + const __m256i vidx1 = _mm256_cvtepi16_epi64(_mm_shuffle_epi8(vidx, vidxshuf)); + + // Handle as 8x8 blocks. + for (int y = 0; y < height; y += 8) { + __m256i vref0 = _mm256_i64gather_epi64((const long long*)&ref[y + 1], vidx0, 1); + __m256i vref1 = _mm256_i64gather_epi64((const long long*)&ref[y + 1], vidx1, 1); + + // Transpose the 8x8 block + vref0 = _mm256_shuffle_epi8(vref0, vtranspose0); + vref1 = _mm256_shuffle_epi8(vref1, vtranspose0); + + vref0 = _mm256_permute4x64_epi64(vref0, _MM_SHUFFLE(3, 1, 2, 0)); + vref1 = _mm256_permute4x64_epi64(vref1, _MM_SHUFFLE(3, 1, 2, 0)); + + vref0 = _mm256_shuffle_epi8(vref0, vtranspose1); + vref1 = _mm256_shuffle_epi8(vref1, vtranspose1); + + __m256i vlo32 = _mm256_unpacklo_epi32(vref0, vref1); + __m256i vhi32 = _mm256_unpackhi_epi32(vref0, vref1); + + __m256i vfinal0 = _mm256_permute2x128_si256(vlo32, vhi32, 0x20); + __m256i vfinal1 = _mm256_permute2x128_si256(vlo32, vhi32, 0x31); + + _mm256_store_si256((__m256i*)(dst + 0), vfinal0); + _mm256_store_si256((__m256i*)(dst + 32), vfinal1); + + dst += 64; + } +} + +static void angular_pred_non_fractional_angle_pxl_copy_w16_wide_angle_hor_avx2(uvg_pixel* dst, uvg_pixel* ref, const int height, const int16_t* delta_int) +{ + // const int width = 16; + + // Handle as 16x16 blocks. This function can handle widths from 16 onwards. + for (int y = 0; y < height; y += 16) { + // Offset indices by one since ref[0] is top left. + __m128i vref0 = _mm_loadu_si128((__m128i*)(ref + y + 1 + delta_int[0x00])); + __m128i vref1 = _mm_loadu_si128((__m128i*)(ref + y + 1 + delta_int[0x01])); + __m128i vref2 = _mm_loadu_si128((__m128i*)(ref + y + 1 + delta_int[0x02])); + __m128i vref3 = _mm_loadu_si128((__m128i*)(ref + y + 1 + delta_int[0x03])); + __m128i vref4 = _mm_loadu_si128((__m128i*)(ref + y + 1 + delta_int[0x04])); + __m128i vref5 = _mm_loadu_si128((__m128i*)(ref + y + 1 + delta_int[0x05])); + __m128i vref6 = _mm_loadu_si128((__m128i*)(ref + y + 1 + delta_int[0x06])); + __m128i vref7 = _mm_loadu_si128((__m128i*)(ref + y + 1 + delta_int[0x07])); + + __m128i vref8 = _mm_loadu_si128((__m128i*)(ref + y + 1 + delta_int[0x08])); + __m128i vref9 = _mm_loadu_si128((__m128i*)(ref + y + 1 + delta_int[0x09])); + __m128i vrefa = _mm_loadu_si128((__m128i*)(ref + y + 1 + delta_int[0x0a])); + __m128i vrefb = _mm_loadu_si128((__m128i*)(ref + y + 1 + delta_int[0x0b])); + __m128i vrefc = _mm_loadu_si128((__m128i*)(ref + y + 1 + delta_int[0x0c])); + __m128i vrefd = _mm_loadu_si128((__m128i*)(ref + y + 1 + delta_int[0x0d])); + __m128i vrefe = _mm_loadu_si128((__m128i*)(ref + y + 1 + delta_int[0x0e])); + __m128i vreff = _mm_loadu_si128((__m128i*)(ref + y + 1 + delta_int[0x0f])); + + // The result is just a transpose of the 16x16 block. + __m128i vlo8_0 = _mm_unpacklo_epi8(vref0, vref1); + __m128i vlo8_1 = _mm_unpacklo_epi8(vref2, vref3); + __m128i vlo8_2 = _mm_unpacklo_epi8(vref4, vref5); + __m128i vlo8_3 = _mm_unpacklo_epi8(vref6, vref7); + __m128i vlo8_4 = _mm_unpacklo_epi8(vref8, vref9); + __m128i vlo8_5 = _mm_unpacklo_epi8(vrefa, vrefb); + __m128i vlo8_6 = _mm_unpacklo_epi8(vrefc, vrefd); + __m128i vlo8_7 = _mm_unpacklo_epi8(vrefe, vreff); + + __m128i vhi8_0 = _mm_unpackhi_epi8(vref0, vref1); + __m128i vhi8_1 = _mm_unpackhi_epi8(vref2, vref3); + __m128i vhi8_2 = _mm_unpackhi_epi8(vref4, vref5); + __m128i vhi8_3 = _mm_unpackhi_epi8(vref6, vref7); + __m128i vhi8_4 = _mm_unpackhi_epi8(vref8, vref9); + __m128i vhi8_5 = _mm_unpackhi_epi8(vrefa, vrefb); + __m128i vhi8_6 = _mm_unpackhi_epi8(vrefc, vrefd); + __m128i vhi8_7 = _mm_unpackhi_epi8(vrefe, vreff); + + __m128i vlo16_0 = _mm_unpacklo_epi16(vlo8_0, vlo8_1); + __m128i vlo16_1 = _mm_unpacklo_epi16(vlo8_2, vlo8_3); + __m128i vlo16_2 = _mm_unpacklo_epi16(vhi8_0, vhi8_1); + __m128i vlo16_3 = _mm_unpacklo_epi16(vhi8_2, vhi8_3); + __m128i vlo16_4 = _mm_unpacklo_epi16(vlo8_4, vlo8_5); + __m128i vlo16_5 = _mm_unpacklo_epi16(vlo8_6, vlo8_7); + __m128i vlo16_6 = _mm_unpacklo_epi16(vhi8_4, vhi8_5); + __m128i vlo16_7 = _mm_unpacklo_epi16(vhi8_6, vhi8_7); + + + __m128i vhi16_0 = _mm_unpackhi_epi16(vlo8_0, vlo8_1); + __m128i vhi16_1 = _mm_unpackhi_epi16(vlo8_2, vlo8_3); + __m128i vhi16_2 = _mm_unpackhi_epi16(vhi8_0, vhi8_1); + __m128i vhi16_3 = _mm_unpackhi_epi16(vhi8_2, vhi8_3); + __m128i vhi16_4 = _mm_unpackhi_epi16(vlo8_4, vlo8_5); + __m128i vhi16_5 = _mm_unpackhi_epi16(vlo8_6, vlo8_7); + __m128i vhi16_6 = _mm_unpackhi_epi16(vhi8_4, vhi8_5); + __m128i vhi16_7 = _mm_unpackhi_epi16(vhi8_6, vhi8_7); + + __m128i vlo32_0 = _mm_unpacklo_epi32(vlo16_0, vlo16_1); + __m128i vlo32_1 = _mm_unpacklo_epi32(vlo16_2, vlo16_3); + __m128i vlo32_2 = _mm_unpacklo_epi32(vhi16_0, vhi16_1); + __m128i vlo32_3 = _mm_unpacklo_epi32(vhi16_2, vhi16_3); + __m128i vlo32_4 = _mm_unpacklo_epi32(vlo16_4, vlo16_5); + __m128i vlo32_5 = _mm_unpacklo_epi32(vlo16_6, vlo16_7); + __m128i vlo32_6 = _mm_unpacklo_epi32(vhi16_4, vhi16_5); + __m128i vlo32_7 = _mm_unpacklo_epi32(vhi16_6, vhi16_7); + + __m128i vhi32_0 = _mm_unpackhi_epi32(vlo16_0, vlo16_1); + __m128i vhi32_1 = _mm_unpackhi_epi32(vlo16_2, vlo16_3); + __m128i vhi32_2 = _mm_unpackhi_epi32(vhi16_0, vhi16_1); + __m128i vhi32_3 = _mm_unpackhi_epi32(vhi16_2, vhi16_3); + __m128i vhi32_4 = _mm_unpackhi_epi32(vlo16_4, vlo16_5); + __m128i vhi32_5 = _mm_unpackhi_epi32(vlo16_6, vlo16_7); + __m128i vhi32_6 = _mm_unpackhi_epi32(vhi16_4, vhi16_5); + __m128i vhi32_7 = _mm_unpackhi_epi32(vhi16_6, vhi16_7); + + __m128i vrow0 = _mm_unpacklo_epi64(vlo32_0, vlo32_4); + __m128i vrow1 = _mm_unpackhi_epi64(vlo32_0, vlo32_4); + __m128i vrow2 = _mm_unpacklo_epi64(vhi32_0, vhi32_4); + __m128i vrow3 = _mm_unpackhi_epi64(vhi32_0, vhi32_4); + __m128i vrow4 = _mm_unpacklo_epi64(vlo32_2, vlo32_6); + __m128i vrow5 = _mm_unpackhi_epi64(vlo32_2, vlo32_6); + __m128i vrow6 = _mm_unpacklo_epi64(vhi32_2, vhi32_6); + __m128i vrow7 = _mm_unpackhi_epi64(vhi32_2, vhi32_6); + + __m128i vrow8 = _mm_unpacklo_epi64(vlo32_1, vlo32_5); + __m128i vrwo9 = _mm_unpackhi_epi64(vlo32_1, vlo32_5); + __m128i vrowa = _mm_unpacklo_epi64(vhi32_1, vhi32_5); + __m128i vrowb = _mm_unpackhi_epi64(vhi32_1, vhi32_5); + __m128i vrowc = _mm_unpacklo_epi64(vlo32_3, vlo32_7); + __m128i vrowd = _mm_unpackhi_epi64(vlo32_3, vlo32_7); + __m128i vrowe = _mm_unpacklo_epi64(vhi32_3, vhi32_7); + __m128i vrowf = _mm_unpackhi_epi64(vhi32_3, vhi32_7); + + _mm_store_si128((__m128i*)(dst + 0), vrow0); + _mm_store_si128((__m128i*)(dst + 16), vrow1); + _mm_store_si128((__m128i*)(dst + 32), vrow2); + _mm_store_si128((__m128i*)(dst + 48), vrow3); + _mm_store_si128((__m128i*)(dst + 64), vrow4); + _mm_store_si128((__m128i*)(dst + 80), vrow5); + _mm_store_si128((__m128i*)(dst + 96), vrow6); + _mm_store_si128((__m128i*)(dst + 112), vrow7); + + _mm_store_si128((__m128i*)(dst + 128), vrow8); + _mm_store_si128((__m128i*)(dst + 144), vrwo9); + _mm_store_si128((__m128i*)(dst + 160), vrowa); + _mm_store_si128((__m128i*)(dst + 176), vrowb); + _mm_store_si128((__m128i*)(dst + 192), vrowc); + _mm_store_si128((__m128i*)(dst + 208), vrowd); + _mm_store_si128((__m128i*)(dst + 224), vrowe); + _mm_store_si128((__m128i*)(dst + 240), vrowf); + + dst += 256; + } +} + +static void angular_pred_non_fractional_angle_pxl_copy_w32_wide_angle_hor_avx2(uvg_pixel* dst, uvg_pixel* ref, const int height, const int16_t* delta_int) +{ + // const int width = 32; + // Handle as 32x32 blocks. Similarly to the w16 version, this is also just a transpose of the 32x32 block. + // TODO: if this is too slow, consider doing it in 16x16 blocks. There will be a lot of moving data between registers in this solution. + for (int y = 0; y < height; y += 32) { + // Offset indices by one since ref[0] is top left. + __m256i vref00 = _mm256_loadu_si256((__m256i*)(ref + y + 1 + delta_int[0x00])); + __m256i vref01 = _mm256_loadu_si256((__m256i*)(ref + y + 1 + delta_int[0x01])); + __m256i vref02 = _mm256_loadu_si256((__m256i*)(ref + y + 1 + delta_int[0x02])); + __m256i vref03 = _mm256_loadu_si256((__m256i*)(ref + y + 1 + delta_int[0x03])); + __m256i vref04 = _mm256_loadu_si256((__m256i*)(ref + y + 1 + delta_int[0x04])); + __m256i vref05 = _mm256_loadu_si256((__m256i*)(ref + y + 1 + delta_int[0x05])); + __m256i vref06 = _mm256_loadu_si256((__m256i*)(ref + y + 1 + delta_int[0x06])); + __m256i vref07 = _mm256_loadu_si256((__m256i*)(ref + y + 1 + delta_int[0x07])); + + __m256i vref08 = _mm256_loadu_si256((__m256i*)(ref + y + 1 + delta_int[0x08])); + __m256i vref09 = _mm256_loadu_si256((__m256i*)(ref + y + 1 + delta_int[0x09])); + __m256i vref0a = _mm256_loadu_si256((__m256i*)(ref + y + 1 + delta_int[0x0a])); + __m256i vref0b = _mm256_loadu_si256((__m256i*)(ref + y + 1 + delta_int[0x0b])); + __m256i vref0c = _mm256_loadu_si256((__m256i*)(ref + y + 1 + delta_int[0x0c])); + __m256i vref0d = _mm256_loadu_si256((__m256i*)(ref + y + 1 + delta_int[0x0d])); + __m256i vref0e = _mm256_loadu_si256((__m256i*)(ref + y + 1 + delta_int[0x0e])); + __m256i vref0f = _mm256_loadu_si256((__m256i*)(ref + y + 1 + delta_int[0x0f])); + + __m256i vref10 = _mm256_loadu_si256((__m256i*)(ref + y + 1 + delta_int[0x10])); + __m256i vref11 = _mm256_loadu_si256((__m256i*)(ref + y + 1 + delta_int[0x11])); + __m256i vref12 = _mm256_loadu_si256((__m256i*)(ref + y + 1 + delta_int[0x12])); + __m256i vref13 = _mm256_loadu_si256((__m256i*)(ref + y + 1 + delta_int[0x13])); + __m256i vref14 = _mm256_loadu_si256((__m256i*)(ref + y + 1 + delta_int[0x14])); + __m256i vref15 = _mm256_loadu_si256((__m256i*)(ref + y + 1 + delta_int[0x15])); + __m256i vref16 = _mm256_loadu_si256((__m256i*)(ref + y + 1 + delta_int[0x16])); + __m256i vref17 = _mm256_loadu_si256((__m256i*)(ref + y + 1 + delta_int[0x17])); + + __m256i vref18 = _mm256_loadu_si256((__m256i*)(ref + y + 1 + delta_int[0x18])); + __m256i vref19 = _mm256_loadu_si256((__m256i*)(ref + y + 1 + delta_int[0x19])); + __m256i vref1a = _mm256_loadu_si256((__m256i*)(ref + y + 1 + delta_int[0x1a])); + __m256i vref1b = _mm256_loadu_si256((__m256i*)(ref + y + 1 + delta_int[0x1b])); + __m256i vref1c = _mm256_loadu_si256((__m256i*)(ref + y + 1 + delta_int[0x1c])); + __m256i vref1d = _mm256_loadu_si256((__m256i*)(ref + y + 1 + delta_int[0x1d])); + __m256i vref1e = _mm256_loadu_si256((__m256i*)(ref + y + 1 + delta_int[0x1e])); + __m256i vref1f = _mm256_loadu_si256((__m256i*)(ref + y + 1 + delta_int[0x1f])); + + __m256i vlo8_0 = _mm256_unpacklo_epi8(vref00, vref01); + __m256i vlo8_1 = _mm256_unpacklo_epi8(vref02, vref03); + __m256i vlo8_2 = _mm256_unpacklo_epi8(vref04, vref05); + __m256i vlo8_3 = _mm256_unpacklo_epi8(vref06, vref07); + __m256i vlo8_4 = _mm256_unpacklo_epi8(vref08, vref09); + __m256i vlo8_5 = _mm256_unpacklo_epi8(vref0a, vref0b); + __m256i vlo8_6 = _mm256_unpacklo_epi8(vref0c, vref0d); + __m256i vlo8_7 = _mm256_unpacklo_epi8(vref0e, vref0f); + __m256i vlo8_8 = _mm256_unpacklo_epi8(vref10, vref11); + __m256i vlo8_9 = _mm256_unpacklo_epi8(vref12, vref13); + __m256i vlo8_a = _mm256_unpacklo_epi8(vref14, vref15); + __m256i vlo8_b = _mm256_unpacklo_epi8(vref16, vref17); + __m256i vlo8_c = _mm256_unpacklo_epi8(vref18, vref19); + __m256i vlo8_d = _mm256_unpacklo_epi8(vref1a, vref1b); + __m256i vlo8_e = _mm256_unpacklo_epi8(vref1c, vref1d); + __m256i vlo8_f = _mm256_unpacklo_epi8(vref1e, vref1f); + + __m256i vhi8_0 = _mm256_unpackhi_epi8(vref00, vref01); + __m256i vhi8_1 = _mm256_unpackhi_epi8(vref02, vref03); + __m256i vhi8_2 = _mm256_unpackhi_epi8(vref04, vref05); + __m256i vhi8_3 = _mm256_unpackhi_epi8(vref06, vref07); + __m256i vhi8_4 = _mm256_unpackhi_epi8(vref08, vref09); + __m256i vhi8_5 = _mm256_unpackhi_epi8(vref0a, vref0b); + __m256i vhi8_6 = _mm256_unpackhi_epi8(vref0c, vref0d); + __m256i vhi8_7 = _mm256_unpackhi_epi8(vref0e, vref0f); + __m256i vhi8_8 = _mm256_unpackhi_epi8(vref10, vref11); + __m256i vhi8_9 = _mm256_unpackhi_epi8(vref12, vref13); + __m256i vhi8_a = _mm256_unpackhi_epi8(vref14, vref15); + __m256i vhi8_b = _mm256_unpackhi_epi8(vref16, vref17); + __m256i vhi8_c = _mm256_unpackhi_epi8(vref18, vref19); + __m256i vhi8_d = _mm256_unpackhi_epi8(vref1a, vref1b); + __m256i vhi8_e = _mm256_unpackhi_epi8(vref1c, vref1d); + __m256i vhi8_f = _mm256_unpackhi_epi8(vref1e, vref1f); + + __m256i vlo16_0 = _mm256_unpacklo_epi16(vlo8_0, vlo8_1); + __m256i vlo16_1 = _mm256_unpacklo_epi16(vlo8_2, vlo8_3); + __m256i vlo16_2 = _mm256_unpacklo_epi16(vlo8_4, vlo8_5); + __m256i vlo16_3 = _mm256_unpacklo_epi16(vlo8_6, vlo8_7); + __m256i vlo16_4 = _mm256_unpacklo_epi16(vlo8_8, vlo8_9); + __m256i vlo16_5 = _mm256_unpacklo_epi16(vlo8_a, vlo8_b); + __m256i vlo16_6 = _mm256_unpacklo_epi16(vlo8_c, vlo8_d); + __m256i vlo16_7 = _mm256_unpacklo_epi16(vlo8_e, vlo8_f); + __m256i vlo16_8 = _mm256_unpacklo_epi16(vhi8_0, vhi8_1); + __m256i vlo16_9 = _mm256_unpacklo_epi16(vhi8_2, vhi8_3); + __m256i vlo16_a = _mm256_unpacklo_epi16(vhi8_4, vhi8_5); + __m256i vlo16_b = _mm256_unpacklo_epi16(vhi8_6, vhi8_7); + __m256i vlo16_c = _mm256_unpacklo_epi16(vhi8_8, vhi8_9); + __m256i vlo16_d = _mm256_unpacklo_epi16(vhi8_a, vhi8_b); + __m256i vlo16_e = _mm256_unpacklo_epi16(vhi8_c, vhi8_d); + __m256i vlo16_f = _mm256_unpacklo_epi16(vhi8_e, vhi8_f); + + __m256i vhi16_0 = _mm256_unpackhi_epi16(vlo8_0, vlo8_1); + __m256i vhi16_1 = _mm256_unpackhi_epi16(vlo8_2, vlo8_3); + __m256i vhi16_2 = _mm256_unpackhi_epi16(vlo8_4, vlo8_5); + __m256i vhi16_3 = _mm256_unpackhi_epi16(vlo8_6, vlo8_7); + __m256i vhi16_4 = _mm256_unpackhi_epi16(vlo8_8, vlo8_9); + __m256i vhi16_5 = _mm256_unpackhi_epi16(vlo8_a, vlo8_b); + __m256i vhi16_6 = _mm256_unpackhi_epi16(vlo8_c, vlo8_d); + __m256i vhi16_7 = _mm256_unpackhi_epi16(vlo8_e, vlo8_f); + __m256i vhi16_8 = _mm256_unpackhi_epi16(vhi8_0, vhi8_1); + __m256i vhi16_9 = _mm256_unpackhi_epi16(vhi8_2, vhi8_3); + __m256i vhi16_a = _mm256_unpackhi_epi16(vhi8_4, vhi8_5); + __m256i vhi16_b = _mm256_unpackhi_epi16(vhi8_6, vhi8_7); + __m256i vhi16_c = _mm256_unpackhi_epi16(vhi8_8, vhi8_9); + __m256i vhi16_d = _mm256_unpackhi_epi16(vhi8_a, vhi8_b); + __m256i vhi16_e = _mm256_unpackhi_epi16(vhi8_c, vhi8_d); + __m256i vhi16_f = _mm256_unpackhi_epi16(vhi8_e, vhi8_f); + + __m256i vlo32_0 = _mm256_unpacklo_epi32(vlo16_0, vlo16_1); + __m256i vlo32_1 = _mm256_unpacklo_epi32(vlo16_2, vlo16_3); + __m256i vlo32_2 = _mm256_unpacklo_epi32(vlo16_4, vlo16_5); + __m256i vlo32_3 = _mm256_unpacklo_epi32(vlo16_6, vlo16_7); + __m256i vlo32_4 = _mm256_unpacklo_epi32(vhi16_0, vhi16_1); + __m256i vlo32_5 = _mm256_unpacklo_epi32(vhi16_2, vhi16_3); + __m256i vlo32_6 = _mm256_unpacklo_epi32(vhi16_4, vhi16_5); + __m256i vlo32_7 = _mm256_unpacklo_epi32(vhi16_6, vhi16_7); + __m256i vlo32_8 = _mm256_unpacklo_epi32(vlo16_8, vlo16_9); + __m256i vlo32_9 = _mm256_unpacklo_epi32(vlo16_a, vlo16_b); + __m256i vlo32_a = _mm256_unpacklo_epi32(vlo16_c, vlo16_d); + __m256i vlo32_b = _mm256_unpacklo_epi32(vlo16_e, vlo16_f); + __m256i vlo32_c = _mm256_unpacklo_epi32(vhi16_8, vhi16_9); + __m256i vlo32_d = _mm256_unpacklo_epi32(vhi16_a, vhi16_b); + __m256i vlo32_e = _mm256_unpacklo_epi32(vhi16_c, vhi16_d); + __m256i vlo32_f = _mm256_unpacklo_epi32(vhi16_e, vhi16_f); + + __m256i vhi32_0 = _mm256_unpackhi_epi32(vlo16_0, vlo16_1); + __m256i vhi32_1 = _mm256_unpackhi_epi32(vlo16_2, vlo16_3); + __m256i vhi32_2 = _mm256_unpackhi_epi32(vlo16_4, vlo16_5); + __m256i vhi32_3 = _mm256_unpackhi_epi32(vlo16_6, vlo16_7); + __m256i vhi32_4 = _mm256_unpackhi_epi32(vhi16_0, vhi16_1); + __m256i vhi32_5 = _mm256_unpackhi_epi32(vhi16_2, vhi16_3); + __m256i vhi32_6 = _mm256_unpackhi_epi32(vhi16_4, vhi16_5); + __m256i vhi32_7 = _mm256_unpackhi_epi32(vhi16_6, vhi16_7); + __m256i vhi32_8 = _mm256_unpackhi_epi32(vlo16_8, vlo16_9); + __m256i vhi32_9 = _mm256_unpackhi_epi32(vlo16_a, vlo16_b); + __m256i vhi32_a = _mm256_unpackhi_epi32(vlo16_c, vlo16_d); + __m256i vhi32_b = _mm256_unpackhi_epi32(vlo16_e, vlo16_f); + __m256i vhi32_c = _mm256_unpackhi_epi32(vhi16_8, vhi16_9); + __m256i vhi32_d = _mm256_unpackhi_epi32(vhi16_a, vhi16_b); + __m256i vhi32_e = _mm256_unpackhi_epi32(vhi16_c, vhi16_d); + __m256i vhi32_f = _mm256_unpackhi_epi32(vhi16_e, vhi16_f); + + __m256i vlo64_0 = _mm256_unpacklo_epi64(vlo32_0, vlo32_1); + __m256i vlo64_1 = _mm256_unpacklo_epi64(vlo32_2, vlo32_3); + __m256i vlo64_2 = _mm256_unpacklo_epi64(vhi32_0, vhi32_1); + __m256i vlo64_3 = _mm256_unpacklo_epi64(vhi32_2, vhi32_3); + __m256i vlo64_4 = _mm256_unpacklo_epi64(vlo32_4, vlo32_5); + __m256i vlo64_5 = _mm256_unpacklo_epi64(vlo32_6, vlo32_7); + __m256i vlo64_6 = _mm256_unpacklo_epi64(vhi32_4, vhi32_5); + __m256i vlo64_7 = _mm256_unpacklo_epi64(vhi32_6, vhi32_7); + __m256i vlo64_8 = _mm256_unpacklo_epi64(vlo32_8, vlo32_9); + __m256i vlo64_9 = _mm256_unpacklo_epi64(vlo32_a, vlo32_b); + __m256i vlo64_a = _mm256_unpacklo_epi64(vhi32_8, vhi32_9); + __m256i vlo64_b = _mm256_unpacklo_epi64(vhi32_a, vhi32_b); + __m256i vlo64_c = _mm256_unpacklo_epi64(vlo32_c, vlo32_d); + __m256i vlo64_d = _mm256_unpacklo_epi64(vlo32_e, vlo32_f); + __m256i vlo64_e = _mm256_unpacklo_epi64(vhi32_c, vhi32_d); + __m256i vlo64_f = _mm256_unpacklo_epi64(vhi32_e, vhi32_f); + + __m256i vhi64_0 = _mm256_unpackhi_epi64(vlo32_0, vlo32_1); + __m256i vhi64_1 = _mm256_unpackhi_epi64(vlo32_2, vlo32_3); + __m256i vhi64_2 = _mm256_unpackhi_epi64(vhi32_0, vhi32_1); + __m256i vhi64_3 = _mm256_unpackhi_epi64(vhi32_2, vhi32_3); + __m256i vhi64_4 = _mm256_unpackhi_epi64(vlo32_4, vlo32_5); + __m256i vhi64_5 = _mm256_unpackhi_epi64(vlo32_6, vlo32_7); + __m256i vhi64_6 = _mm256_unpackhi_epi64(vhi32_4, vhi32_5); + __m256i vhi64_7 = _mm256_unpackhi_epi64(vhi32_6, vhi32_7); + __m256i vhi64_8 = _mm256_unpackhi_epi64(vlo32_8, vlo32_9); + __m256i vhi64_9 = _mm256_unpackhi_epi64(vlo32_a, vlo32_b); + __m256i vhi64_a = _mm256_unpackhi_epi64(vhi32_8, vhi32_9); + __m256i vhi64_b = _mm256_unpackhi_epi64(vhi32_a, vhi32_b); + __m256i vhi64_c = _mm256_unpackhi_epi64(vlo32_c, vlo32_d); + __m256i vhi64_d = _mm256_unpackhi_epi64(vlo32_e, vlo32_f); + __m256i vhi64_e = _mm256_unpackhi_epi64(vhi32_c, vhi32_d); + __m256i vhi64_f = _mm256_unpackhi_epi64(vhi32_e, vhi32_f); + + __m256i vrow00 = _mm256_permute2x128_si256(vlo64_0, vlo64_1, 0x20); + __m256i vrow01 = _mm256_permute2x128_si256(vhi64_0, vhi64_1, 0x20); + __m256i vrow02 = _mm256_permute2x128_si256(vlo64_2, vlo64_3, 0x20); + __m256i vrow03 = _mm256_permute2x128_si256(vhi64_2, vhi64_3, 0x20); + __m256i vrow04 = _mm256_permute2x128_si256(vlo64_4, vlo64_5, 0x20); + __m256i vrow05 = _mm256_permute2x128_si256(vhi64_4, vhi64_5, 0x20); + __m256i vrow06 = _mm256_permute2x128_si256(vlo64_6, vlo64_7, 0x20); + __m256i vrow07 = _mm256_permute2x128_si256(vhi64_6, vhi64_7, 0x20); + + __m256i vrow08 = _mm256_permute2x128_si256(vlo64_8, vlo64_9, 0x20); + __m256i vrow09 = _mm256_permute2x128_si256(vhi64_8, vhi64_9, 0x20); + __m256i vrow0a = _mm256_permute2x128_si256(vlo64_a, vlo64_b, 0x20); + __m256i vrow0b = _mm256_permute2x128_si256(vhi64_a, vhi64_b, 0x20); + __m256i vrow0c = _mm256_permute2x128_si256(vlo64_c, vlo64_d, 0x20); + __m256i vrow0d = _mm256_permute2x128_si256(vhi64_c, vhi64_d, 0x20); + __m256i vrow0e = _mm256_permute2x128_si256(vlo64_e, vlo64_f, 0x20); + __m256i vrow0f = _mm256_permute2x128_si256(vhi64_e, vhi64_f, 0x20); + + __m256i vrow10 = _mm256_permute2x128_si256(vlo64_0, vlo64_1, 0x31); + __m256i vrow11 = _mm256_permute2x128_si256(vhi64_0, vhi64_1, 0x31); + __m256i vrow12 = _mm256_permute2x128_si256(vlo64_2, vlo64_3, 0x31); + __m256i vrow13 = _mm256_permute2x128_si256(vhi64_2, vhi64_3, 0x31); + __m256i vrow14 = _mm256_permute2x128_si256(vlo64_4, vlo64_5, 0x31); + __m256i vrow15 = _mm256_permute2x128_si256(vhi64_4, vhi64_5, 0x31); + __m256i vrow16 = _mm256_permute2x128_si256(vlo64_6, vlo64_7, 0x31); + __m256i vrow17 = _mm256_permute2x128_si256(vhi64_6, vhi64_7, 0x31); + + __m256i vrow18 = _mm256_permute2x128_si256(vlo64_8, vlo64_9, 0x31); + __m256i vrow19 = _mm256_permute2x128_si256(vhi64_8, vhi64_9, 0x31); + __m256i vrow1a = _mm256_permute2x128_si256(vlo64_a, vlo64_b, 0x31); + __m256i vrow1b = _mm256_permute2x128_si256(vhi64_a, vhi64_b, 0x31); + __m256i vrow1c = _mm256_permute2x128_si256(vlo64_c, vlo64_d, 0x31); + __m256i vrow1d = _mm256_permute2x128_si256(vhi64_c, vhi64_d, 0x31); + __m256i vrow1e = _mm256_permute2x128_si256(vlo64_e, vlo64_f, 0x31); + __m256i vrow1f = _mm256_permute2x128_si256(vhi64_e, vhi64_f, 0x31); + + _mm256_store_si256((__m256i*)(dst + 0), vrow00); + _mm256_store_si256((__m256i*)(dst + 32), vrow01); + _mm256_store_si256((__m256i*)(dst + 64), vrow02); + _mm256_store_si256((__m256i*)(dst + 96), vrow03); + _mm256_store_si256((__m256i*)(dst + 128), vrow04); + _mm256_store_si256((__m256i*)(dst + 160), vrow05); + _mm256_store_si256((__m256i*)(dst + 192), vrow06); + _mm256_store_si256((__m256i*)(dst + 224), vrow07); + _mm256_store_si256((__m256i*)(dst + 256), vrow08); + _mm256_store_si256((__m256i*)(dst + 288), vrow09); + _mm256_store_si256((__m256i*)(dst + 320), vrow0a); + _mm256_store_si256((__m256i*)(dst + 352), vrow0b); + _mm256_store_si256((__m256i*)(dst + 384), vrow0c); + _mm256_store_si256((__m256i*)(dst + 416), vrow0d); + _mm256_store_si256((__m256i*)(dst + 448), vrow0e); + _mm256_store_si256((__m256i*)(dst + 480), vrow0f); + _mm256_store_si256((__m256i*)(dst + 512), vrow10); + _mm256_store_si256((__m256i*)(dst + 544), vrow11); + _mm256_store_si256((__m256i*)(dst + 576), vrow12); + _mm256_store_si256((__m256i*)(dst + 608), vrow13); + _mm256_store_si256((__m256i*)(dst + 640), vrow14); + _mm256_store_si256((__m256i*)(dst + 672), vrow15); + _mm256_store_si256((__m256i*)(dst + 704), vrow16); + _mm256_store_si256((__m256i*)(dst + 736), vrow17); + _mm256_store_si256((__m256i*)(dst + 768), vrow18); + _mm256_store_si256((__m256i*)(dst + 800), vrow19); + _mm256_store_si256((__m256i*)(dst + 832), vrow1a); + _mm256_store_si256((__m256i*)(dst + 864), vrow1b); + _mm256_store_si256((__m256i*)(dst + 896), vrow1c); + _mm256_store_si256((__m256i*)(dst + 928), vrow1d); + _mm256_store_si256((__m256i*)(dst + 960), vrow1e); + _mm256_store_si256((__m256i*)(dst + 992), vrow1f); + + dst += 1024; + } +} + + + +static void angular_pdpc_ver_w8_high_angle_avx2(uvg_pixel* dst, const uvg_pixel* ref_side, const int height, const int scale, const int mode_disp) +{ + const int width = 8; + + int limit = MIN(3 << scale, width); + + __m128i vseq = _mm_setr_epi32(0x00, 0x00, 0x01, 0x00); + __m128i vidx = _mm_slli_epi64(vseq, 3); // 3 is log2 width + __m256i v32s = _mm256_set1_epi16(32); + + const int offset = scale * 16; + const __m256i vweight = _mm256_load_si256((const __m256i*)&intra_pdpc_w8_ver_weight[offset]); + + const int inv_angle_offset = mode_disp * 64; + const int16_t* shifted_inv_angle_sum = &intra_pdpc_shifted_inv_angle_sum[inv_angle_offset]; + + // For width 8, height must be at least 2. Handle 2 lines at once. + for (int y = 0; y < height; y += 2) { + ALIGNED(32) int16_t left[16] = {0}; + for (int xx = 0; xx < limit; ++xx) { + for (int yy = 0; yy < 2; ++yy) { + left[yy * width +xx] = ref_side[(y + yy) + shifted_inv_angle_sum[xx] + 1]; + } + } + + __m128i vdst = _mm_i64gather_epi64((const long long int*)(dst + y * width), vseq, 8); + __m256i vdst16 = _mm256_cvtepu8_epi16(vdst); + __m256i vleft = _mm256_loadu_si256((__m256i*)left); + + __m256i accu = _mm256_sub_epi16(vleft, vdst16); + accu = _mm256_mullo_epi16(vweight, accu); + accu = _mm256_add_epi16(accu, v32s); + accu = _mm256_srai_epi16(accu, 6); + accu = _mm256_add_epi16(vdst16, accu); + + __m128i lo = _mm256_castsi256_si128(accu); + __m128i hi = _mm256_extracti128_si256(accu, 1); + __m128i filtered = _mm_packus_epi16(lo, hi); + + _mm_store_si128((__m128i*)(dst + (y * width)), filtered); + } +} + + +static void angular_pdpc_ver_w16_scale0_avx2(uvg_pixel* dst, const uvg_pixel* ref_side, const int width, const int height, const int mode_disp) +{ + // NOTE: This function is just the w4 function, retrofitted to work with width 16 and up when scale is 0. + // Since scale is 0, limit is 3 and therefore there is no meaningful work to be done when x > 3, so only the first column of 4x4 chunks is handled. + // NOTE: This function also works with width 8 when scale is 0, the name w16 might be a bit misleading. + const int scale = 0; + int16_t left[4][4]; + const int log2_width = uvg_g_convert_to_log2[width]; + + const int limit = 3; + + __m128i vseq = _mm_setr_epi32(0, 1, 2, 3); + __m128i vidx = _mm_slli_epi32(vseq, log2_width); + __m256i v32s = _mm256_set1_epi16(32); + + // Scale can be 0, 1 or 2 + const int offset = scale * 16; + const __m256i vweight = _mm256_load_si256((const __m256i*) & intra_pdpc_w4_ver_weight[offset]); + + const int inv_angle_offset = mode_disp * 64; + int16_t shifted_inv_angle_sum[64]; + memcpy(shifted_inv_angle_sum, &intra_pdpc_shifted_inv_angle_sum[inv_angle_offset], height * sizeof(int16_t)); // TODO: would this be faster if the max amount (64) would be always loaded? + + // For a 4 width block, height must be at least 4. Handle 4 lines at once. + for (int y = 0; y < height; y += 4) { + for (int xx = 0; xx < 4; ++xx) { + for (int yy = 0; yy < 4; ++yy) { + left[yy][xx] = ref_side[(y + yy) + shifted_inv_angle_sum[xx] + 1]; + } + } + + __m128i vdst = _mm_i32gather_epi32((const int32_t*)(dst + y * width), vidx, 1); + __m256i vdst16 = _mm256_cvtepu8_epi16(vdst); + __m256i vleft = _mm256_loadu_si256((__m256i*)left); + + __m256i accu = _mm256_sub_epi16(vleft, vdst16); + accu = _mm256_mullo_epi16(vweight, accu); + accu = _mm256_add_epi16(accu, v32s); + accu = _mm256_srai_epi16(accu, 6); + accu = _mm256_add_epi16(vdst16, accu); + + __m128i lo = _mm256_castsi256_si128(accu); + __m128i hi = _mm256_extracti128_si256(accu, 1); + __m128i filtered = _mm_packus_epi16(lo, hi); + + *(uint32_t*)(dst + (y + 0) * width) = _mm_extract_epi32(filtered, 0); + *(uint32_t*)(dst + (y + 1) * width) = _mm_extract_epi32(filtered, 1); + *(uint32_t*)(dst + (y + 2) * width) = _mm_extract_epi32(filtered, 2); + *(uint32_t*)(dst + (y + 3) * width) = _mm_extract_epi32(filtered, 3); + } +} + + +// Mode 18 + +static void angular_pdpc_mode18_w4_avx2(uvg_pixel* dst, const uvg_pixel top_left, const uvg_pixel* ref_side, const int height, const int scale) +{ + const int width = 4; + const int limit = MIN(3 << scale, height); + + __m256i v32s = _mm256_set1_epi16(32); + + const uint32_t ref4 = *(uint32_t*)&ref_side[1]; + + __m128i vref = _mm_set1_epi32(ref4); + __m256i vref16 = _mm256_cvtepu8_epi16(vref); + + __m256i vtopleft = _mm256_set1_epi16((uint16_t)top_left); + + // Weight table offset + const int table_offset = scale * 64; + + for (int y = 0, o = 0; y < limit; y += 4, o += 16) { + const int offset = table_offset + o; + + __m128i vpred = _mm_load_si128((__m128i*)(dst + y * width)); + + __m256i vpred16 = _mm256_cvtepu8_epi16(vpred); + __m256i vweight = _mm256_load_si256((const __m256i*) & intra_pdpc_w4_hor_weight[offset]); + + __m256i accu = _mm256_sub_epi16(vref16, vtopleft); + accu = _mm256_mullo_epi16(vweight, accu); + accu = _mm256_add_epi16(accu, v32s); + accu = _mm256_srai_epi16(accu, 6); + accu = _mm256_add_epi16(vpred16, accu); + + __m128i lo = _mm256_castsi256_si128(accu); + __m128i hi = _mm256_extracti128_si256(accu, 1); + __m128i filtered = _mm_packus_epi16(lo, hi); + + _mm_storeu_si128((__m128i*)(dst + (y * width)), filtered); + } +} + +static void angular_pdpc_mode18_w8_avx2(uvg_pixel* dst, const uvg_pixel top_left, const uvg_pixel* ref_side, const int height, const int scale) +{ + const int width = 8; + int limit = MIN(3 << scale, height); + + __m256i v32s = _mm256_set1_epi16(32); + + const uint64_t ref8 = *(uint64_t*)&ref_side[1]; + + __m128i vref = _mm_set1_epi64x(ref8); + __m256i vref16 = _mm256_cvtepu8_epi16(vref); + + __m256i vtopleft = _mm256_set1_epi16((uint16_t)top_left); + + // Weight table offset + const int table_offset = scale * 128; + + for (int y = 0, o = table_offset; y < limit; y += 2, o += 16) { + const __m256i vwT = _mm256_load_si256((const __m256i*) & intra_pdpc_w8_hor_weight[o]); + + __m128i vpred = _mm_load_si128((__m128i*)(dst + y * width)); + __m256i vpred16 = _mm256_cvtepu8_epi16(vpred); + + __m256i accu = _mm256_sub_epi16(vref16, vtopleft); + accu = _mm256_mullo_epi16(vwT, accu); + accu = _mm256_add_epi16(accu, v32s); + accu = _mm256_srai_epi16(accu, 6); + accu = _mm256_add_epi16(vpred16, accu); + + __m128i lo = _mm256_castsi256_si128(accu); + __m128i hi = _mm256_extracti128_si256(accu, 1); + __m128i filtered = _mm_packus_epi16(lo, hi); + + _mm_storeu_si128((__m128i*)(dst + (y * width)), filtered); + } +} + +static void angular_pdpc_mode18_w16_avx2(uvg_pixel* dst, const uvg_pixel top_left, const uvg_pixel* ref_side, const int height, const int scale) +{ + const int width = 16; + int limit = MIN(3 << scale, height); + __m256i v32s = _mm256_set1_epi16(32); + + __m128i vref = _mm_loadu_si128((const __m128i*) & ref_side[1]); + __m256i vref16 = _mm256_cvtepu8_epi16(vref); + + __m256i vtopleft = _mm256_set1_epi16((uint16_t)top_left); + + // Handle one line at a time. Skip line if vertical limit reached. + for (int y = 0; y < limit; ++y) { + const int16_t wT = 32 >> (2 * (y + 0) >> scale); + __m256i vwT = _mm256_set1_epi16(wT); + + for (int x = 0; x < width; x += 16) { + __m128i vpred = _mm_load_si128((__m128i*)(dst + (y * width + x))); + __m256i vpred16 = _mm256_cvtepu8_epi16(vpred); + + __m256i accu = _mm256_sub_epi16(vref16, vtopleft); + accu = _mm256_mullo_epi16(vwT, accu); + accu = _mm256_add_epi16(accu, v32s); accu = _mm256_srai_epi16(accu, 6); - accu = _mm256_add_epi16(vdst16, accu); + accu = _mm256_add_epi16(vpred16, accu); __m128i lo = _mm256_castsi256_si128(accu); __m128i hi = _mm256_extracti128_si256(accu, 1); - vdst = _mm_packus_epi16(lo, hi); + __m128i filtered = _mm_packus_epi16(lo, hi); - *(uint32_t*)(dst + (y + 0) * width + x) = _mm_extract_epi32(vdst, 0); - *(uint32_t*)(dst + (y + 1) * width + x) = _mm_extract_epi32(vdst, 1); - *(uint32_t*)(dst + (y + 2) * width + x) = _mm_extract_epi32(vdst, 2); - *(uint32_t*)(dst + (y + 3) * width + x) = _mm_extract_epi32(vdst, 3); + _mm_storeu_si128((__m128i*)(dst + (y * width + x)), filtered); } } } -#endif //UVG_BIT_DEPTH == 8 -#endif //COMPILE_INTEL_AVX2 && defined X86_64 +static void angular_pdpc_mode18_w32_avx2(uvg_pixel* dst, const uvg_pixel top_left, const uvg_pixel* ref_side, const int height, const int scale) +{ + const int width = 32; + int limit = MIN(3 << scale, height); + __m256i v32s = _mm256_set1_epi16(32); + + __m128i vrefa = _mm_loadu_si128((const __m128i*) & ref_side[1]); + __m256i vref16a = _mm256_cvtepu8_epi16(vrefa); + + __m128i vrefb = _mm_loadu_si128((const __m128i*) & ref_side[17]); + __m256i vref16b = _mm256_cvtepu8_epi16(vrefb); + + __m256i vtopleft = _mm256_set1_epi16((uint16_t)top_left); + + // Handle one line at a time. Skip line if vertical limit reached. + for (int y = 0; y < limit; ++y) { + const int16_t wT = 32 >> (2 * (y + 0) >> scale); + __m256i vwT = _mm256_set1_epi16(wT); + + // Calculate first half + __m128i vpred = _mm_load_si128((__m128i*)(dst + (y * width + 0))); + __m256i vpred16 = _mm256_cvtepu8_epi16(vpred); + + __m256i accu0 = _mm256_sub_epi16(vref16a, vtopleft); + accu0 = _mm256_mullo_epi16(vwT, accu0); + accu0 = _mm256_add_epi16(accu0, v32s); + accu0 = _mm256_srai_epi16(accu0, 6); + accu0 = _mm256_add_epi16(vpred16, accu0); + + // Calculate second half + vpred = _mm_load_si128((__m128i*)(dst + (y * width + 16))); + vpred16 = _mm256_cvtepu8_epi16(vpred); + + __m256i accu1 = _mm256_sub_epi16(vref16b, vtopleft); + accu1 = _mm256_mullo_epi16(vwT, accu1); + accu1 = _mm256_add_epi16(accu1, v32s); + accu1 = _mm256_srai_epi16(accu1, 6); + accu1 = _mm256_add_epi16(vpred16, accu1); + + // Store results + __m256i packed = _mm256_packus_epi16(accu0, accu1); + packed = _mm256_permute4x64_epi64(packed, _MM_SHUFFLE(3, 1, 2, 0)); + + _mm256_store_si256((__m256i*)(dst + (y * width)), packed); + } +} + +static void angular_pdpc_mode18_w64_avx2(uvg_pixel* dst, const uvg_pixel top_left, const uvg_pixel* ref_side, const int height, const int scale) +{ + const int width = 64; + int limit = MIN(3 << scale, height); + __m256i v32s = _mm256_set1_epi16(32); + + __m128i vrefa = _mm_loadu_si128((const __m128i*) &ref_side[0 + 1]); + __m256i vref16a = _mm256_cvtepu8_epi16(vrefa); + + __m128i vrefb = _mm_loadu_si128((const __m128i*) &ref_side[16 + 1]); + __m256i vref16b = _mm256_cvtepu8_epi16(vrefb); + + __m128i vrefc = _mm_loadu_si128((const __m128i*) &ref_side[32 + 1]); + __m256i vref16c = _mm256_cvtepu8_epi16(vrefc); + + __m128i vrefd = _mm_loadu_si128((const __m128i*) &ref_side[48 + 1]); + __m256i vref16d = _mm256_cvtepu8_epi16(vrefd); + + __m256i vtopleft = _mm256_set1_epi16((uint16_t)top_left); + + // Handle one line at a time. Skip line if vertical limit reached. + for (int y = 0; y < limit; ++y) { + const int16_t wT = 32 >> (2 * (y + 0) >> scale); + __m256i vwT = _mm256_set1_epi16(wT); + + // Calculate first quarter + __m128i vpred = _mm_load_si128((__m128i*)(dst + (y * width + 0))); + __m256i vpred16 = _mm256_cvtepu8_epi16(vpred); + + __m256i accu0 = _mm256_sub_epi16(vref16a, vtopleft); + accu0 = _mm256_mullo_epi16(vwT, accu0); + accu0 = _mm256_add_epi16(accu0, v32s); + accu0 = _mm256_srai_epi16(accu0, 6); + accu0 = _mm256_add_epi16(vpred16, accu0); + + // Calculate second quarter + vpred = _mm_load_si128((__m128i*)(dst + (y * width + 16))); + vpred16 = _mm256_cvtepu8_epi16(vpred); + + __m256i accu1 = _mm256_sub_epi16(vref16b, vtopleft); + accu1 = _mm256_mullo_epi16(vwT, accu1); + accu1 = _mm256_add_epi16(accu1, v32s); + accu1 = _mm256_srai_epi16(accu1, 6); + accu1 = _mm256_add_epi16(vpred16, accu1); + + // Calculate third quarter + vpred = _mm_load_si128((__m128i*)(dst + (y * width + 32))); + vpred16 = _mm256_cvtepu8_epi16(vpred); + + __m256i accu2 = _mm256_sub_epi16(vref16c, vtopleft); + accu2 = _mm256_mullo_epi16(vwT, accu2); + accu2 = _mm256_add_epi16(accu2, v32s); + accu2 = _mm256_srai_epi16(accu2, 6); + accu2 = _mm256_add_epi16(vpred16, accu2); + + // Calculate fourth quarter + vpred = _mm_load_si128((__m128i*)(dst + (y * width + 48))); + vpred16 = _mm256_cvtepu8_epi16(vpred); + + __m256i accu3 = _mm256_sub_epi16(vref16d, vtopleft); + accu3 = _mm256_mullo_epi16(vwT, accu3); + accu3 = _mm256_add_epi16(accu3, v32s); + accu3 = _mm256_srai_epi16(accu3, 6); + accu3 = _mm256_add_epi16(vpred16, accu3); + + __m256i packed0 = _mm256_packus_epi16(accu0, accu1); + __m256i packed1 = _mm256_packus_epi16(accu2, accu3); + packed0 = _mm256_permute4x64_epi64(packed0, _MM_SHUFFLE(3, 1, 2, 0)); + packed1 = _mm256_permute4x64_epi64(packed1, _MM_SHUFFLE(3, 1, 2, 0)); + + _mm256_store_si256((__m256i*)(dst + (y * width + 0)), packed0); + _mm256_store_si256((__m256i*)(dst + (y * width + 32)), packed1); + } +} + + +// Vertical modes + +static void angular_pdpc_ver_w4_high_angle_avx2(uvg_pixel* dst, const uvg_pixel* ref_side, const int height, const int scale, const int mode_disp) +{ + const int width = 4; + //ALIGNED(32) uint8_t left[4][4]; + __m128i v32s = _mm_set1_epi16(32); + + // Scale can be 0, 1 or 2 + const int offset = scale * 16; + const __m128i vweight = _mm_load_si128((const __m128i*) &intra_pdpc_w4_ver_improved_weight[offset]); + + const int inv_angle_offset = mode_disp * 64; + const int16_t* shifted_inv_angle_sum = &intra_pdpc_shifted_inv_angle_sum[inv_angle_offset]; + + const __m128i vleftshuf = _mm_setr_epi8( + 0x00, 0x04, 0x08, 0x0c, 0x01, 0x05, 0x09, 0x0d, + 0x02, 0x06, 0x0a, 0x0e, 0x03, 0x07, 0x0b, 0x0f + ); + + __m128i vidx = _mm_setr_epi32(shifted_inv_angle_sum[0], shifted_inv_angle_sum[1], + shifted_inv_angle_sum[2], shifted_inv_angle_sum[3]); + + // For a 4 width block, height must be at least 4. Handle 4 lines at once. + for (int y = 0; y < height; y += 4) { + /*for (int xx = 0; xx < width; ++xx) { + memcpy(left[xx], &ref_side[(y + 0) + shifted_inv_angle_sum[xx] + 1], 4 * sizeof(uint8_t)); + }*/ + + __m128i vdst = _mm_loadu_si128((const __m128i*)(dst + y * width)); + //__m128i vleft = _mm_load_si128((__m128i*)left); + __m128i vleft = _mm_i32gather_epi32((const int32_t*)&ref_side[y + 1], vidx, 1); + vleft = _mm_shuffle_epi8(vleft, vleftshuf); + + __m128i vlo = _mm_unpacklo_epi8(vdst, vleft); + __m128i vhi = _mm_unpackhi_epi8(vdst, vleft); + + __m128i vmaddlo = _mm_maddubs_epi16(vlo, vweight); + __m128i vmaddhi = _mm_maddubs_epi16(vhi, vweight); + + vmaddlo = _mm_add_epi16(vmaddlo, v32s); + vmaddhi = _mm_add_epi16(vmaddhi, v32s); + + vmaddlo = _mm_srai_epi16(vmaddlo, 6); + vmaddhi = _mm_srai_epi16(vmaddhi, 6); + + __m128i packed = _mm_packus_epi16(vmaddlo, vmaddhi); + + _mm_store_si128((__m128i*)(dst + (y * width)), packed); + } +} + +static void angular_pdpc_ver_w4_avx2(uvg_pixel* dst, const uvg_pixel* ref_side, const int height, const int scale, const int mode_disp) +{ + const int width = 4; + __m128i v32s = _mm_set1_epi16(32); + + // Scale can be 0, 1 or 2 + const int offset = scale * 16; + const int inv_angle_offset = mode_disp * 64; + const int shuf_offset = mode_disp * 16; + + const int16_t* shifted_inv_angle_sum = &intra_pdpc_shifted_inv_angle_sum[inv_angle_offset]; + const __m128i vweight = _mm_load_si128((const __m128i*) &intra_pdpc_w4_ver_improved_weight[offset]); + const __m128i vshuf = _mm_loadu_si128((__m128i*) &intra_pdpc_shuffle_vectors_w4_ver[shuf_offset]); + + // For a 4 width block, height must be at least 4. Handle 4 lines at once. + for (int y = 0; y < height; y += 4) { + __m128i vleft = _mm_loadu_si128((__m128i*) &ref_side[y + shifted_inv_angle_sum[0] + 1]); + vleft = _mm_shuffle_epi8(vleft, vshuf); + + //__m128i vdst = _mm_i32gather_epi32((const int32_t*)(dst + y * width), vseq, 4); + __m128i vdst = _mm_loadu_si128((const __m128i*)(dst + y * width)); + + __m128i vlo = _mm_unpacklo_epi8(vdst, vleft); + __m128i vhi = _mm_unpackhi_epi8(vdst, vleft); + + __m128i vmaddlo = _mm_maddubs_epi16(vlo, vweight); + __m128i vmaddhi = _mm_maddubs_epi16(vhi, vweight); + + vmaddlo = _mm_add_epi16(vmaddlo, v32s); + vmaddhi = _mm_add_epi16(vmaddhi, v32s); + + vmaddlo = _mm_srai_epi16(vmaddlo, 6); + vmaddhi = _mm_srai_epi16(vmaddhi, 6); + + __m128i packed = _mm_packus_epi16(vmaddlo, vmaddhi); + + _mm_store_si128((__m128i*)(dst + (y * width)), packed); + } +} + + +static void angular_pdpc_ver_4x4_scale0_high_angle_avx2(uvg_pixel* dst, const uvg_pixel* ref_side, const int width, const int height, const int mode_disp) +{ + // This function is just the w4 function, retrofitted to work with any width when scale is 0. If width is 4, use a specialized function instead. + // Since scale is 0, limit is 3 and therefore there is no meaningful work to be done when x > 3, so only the first column of 4x4 chunks is handled. + const int scale = 0; + const int log2_width = uvg_g_convert_to_log2[width]; + __m128i v32s = _mm_set1_epi16(32); + + // Scale can be 0, 1 or 2 + const int offset = scale * 16; + const __m128i vweight = _mm_load_si128((const __m128i*) & intra_pdpc_w4_ver_improved_weight[offset]); + + const int inv_angle_offset = mode_disp * 64; + const int16_t* shifted_inv_angle_sum = &intra_pdpc_shifted_inv_angle_sum[inv_angle_offset]; + + __m128i vseq = _mm_setr_epi32(0, 1, 2, 3); + __m128i vidx = _mm_slli_epi32(vseq, log2_width); + + __m128i vidx_left = _mm_setr_epi32(shifted_inv_angle_sum[0], shifted_inv_angle_sum[1], + shifted_inv_angle_sum[2], shifted_inv_angle_sum[3]); + + const __m128i vleftshuf = _mm_setr_epi8( + 0x00, 0x04, 0x08, 0x0c, 0x01, 0x05, 0x09, 0x0d, + 0x02, 0x06, 0x0a, 0x0e, 0x03, 0x07, 0x0b, 0x0f + ); + + // For a 4 width block, height must be at least 4. Handle 4 lines at once. + for (int y = 0; y < height; y += 4) { + __m128i vdst = _mm_i32gather_epi32((const int32_t*)(dst + y * width), vidx, 1); + __m128i vleft = _mm_i32gather_epi32((const int32_t*)&ref_side[y + 1], vidx_left, 1); + vleft = _mm_shuffle_epi8(vleft, vleftshuf); + + __m128i vlo = _mm_unpacklo_epi8(vdst, vleft); + __m128i vhi = _mm_unpackhi_epi8(vdst, vleft); + + __m128i vmaddlo = _mm_maddubs_epi16(vlo, vweight); + __m128i vmaddhi = _mm_maddubs_epi16(vhi, vweight); + + vmaddlo = _mm_add_epi16(vmaddlo, v32s); + vmaddhi = _mm_add_epi16(vmaddhi, v32s); + + vmaddlo = _mm_srai_epi16(vmaddlo, 6); + vmaddhi = _mm_srai_epi16(vmaddhi, 6); + + __m128i packed = _mm_packus_epi16(vmaddlo, vmaddhi); + + *(uint32_t*)(dst + (y + 0) * width) = _mm_extract_epi32(packed, 0); + *(uint32_t*)(dst + (y + 1) * width) = _mm_extract_epi32(packed, 1); + *(uint32_t*)(dst + (y + 2) * width) = _mm_extract_epi32(packed, 2); + *(uint32_t*)(dst + (y + 3) * width) = _mm_extract_epi32(packed, 3); + } +} + +static void angular_pdpc_ver_4x4_scale0_avx2(uvg_pixel* dst, const uvg_pixel* ref_side, const int width, const int height, const int mode_disp) +{ + // This function is just the w4 function, retrofitted to work with any width when scale is 0. If width is 4, use a specialized function instead. + // Since scale is 0, limit is 3 and therefore there is no meaningful work to be done when x > 3, so only the first column of 4x4 chunks is handled. + // This function handles cases where prediction angle is high. For PDPC, this means the needed reference samples are close together, enabling more effective loading. + const int scale = 0; + const int log2_width = uvg_g_convert_to_log2[width]; + + const int limit = 3; + + __m128i vseq = _mm_setr_epi32(0, 1, 2, 3); + __m128i vidx = _mm_slli_epi32(vseq, log2_width); + __m128i v32s = _mm_set1_epi16(32); + + // Scale can be 0, 1 or 2 + const int offset = scale * 16; + const int inv_angle_offset = mode_disp * 64; + const int shuf_offset = mode_disp * 16; + + const __m128i vweight = _mm_load_si128((const __m128i*) &intra_pdpc_w4_ver_improved_weight[offset]); + const int16_t* shifted_inv_angle_sum = &intra_pdpc_shifted_inv_angle_sum[inv_angle_offset]; + const __m128i vshuf = _mm_loadu_si128((__m128i*) &intra_pdpc_shuffle_vectors_w4_ver[shuf_offset]); + + // For a 4 width block, height must be at least 4. Handle 4 lines at once. + for (int y = 0; y < height; y += 4) { + __m128i vleft = _mm_loadu_si128((__m128i*) & ref_side[y + shifted_inv_angle_sum[0] + 1]); + vleft = _mm_shuffle_epi8(vleft, vshuf); + __m128i vdst = _mm_i32gather_epi32((const int32_t*)(dst + y * width), vidx, 1); + + __m128i vlo = _mm_unpacklo_epi8(vdst, vleft); + __m128i vhi = _mm_unpackhi_epi8(vdst, vleft); + + __m128i vmaddlo = _mm_maddubs_epi16(vlo, vweight); + __m128i vmaddhi = _mm_maddubs_epi16(vhi, vweight); + + vmaddlo = _mm_add_epi16(vmaddlo, v32s); + vmaddhi = _mm_add_epi16(vmaddhi, v32s); + + vmaddlo = _mm_srai_epi16(vmaddlo, 6); + vmaddhi = _mm_srai_epi16(vmaddhi, 6); + + __m128i packed = _mm_packus_epi16(vmaddlo, vmaddhi); + + *(uint32_t*)(dst + (y + 0) * width) = _mm_extract_epi32(packed, 0); + *(uint32_t*)(dst + (y + 1) * width) = _mm_extract_epi32(packed, 1); + *(uint32_t*)(dst + (y + 2) * width) = _mm_extract_epi32(packed, 2); + *(uint32_t*)(dst + (y + 3) * width) = _mm_extract_epi32(packed, 3); + } +} + + +static void angular_pdpc_ver_8x4_scale1_high_angle_avx2(uvg_pixel* dst, const uvg_pixel* ref_side, const int width, const int height, const int mode_disp) +{ + // NOTE: This function is just the w8 function, retrofitted to work with width 16 and up when scale is 1. + // Since scale is 1, limit is 6 and therefore there is no meaningful work to be done when x > 6, so only the first column of 8x2 chunks is handled. + const int scale = 1; + const int log2_width = uvg_g_convert_to_log2[width]; + + const int limit = 6; + + __m256i vseq = _mm256_setr_epi64x(0, 1, 2, 3); + __m256i vidx = _mm256_slli_epi32(vseq, log2_width); + __m256i v32s = _mm256_set1_epi16(32); + + const int offset = scale * 32; + const int inv_angle_offset = mode_disp * 64; + const int shuf_offset = mode_disp * 16; + + const __m256i vweight = _mm256_load_si256((const __m256i*) &intra_pdpc_w8_ver_improved_weight[offset]); + const int16_t* shifted_inv_angle_sum = &intra_pdpc_shifted_inv_angle_sum[inv_angle_offset]; + //const __m128i vshuf = _mm_loadu_si128((__m128i*) &intra_pdpc_shuffle_vectors_8x2_scale1_ver[shuf_offset]); + + __m256i vidxleft = _mm256_setr_epi32(shifted_inv_angle_sum[0], shifted_inv_angle_sum[1], + shifted_inv_angle_sum[2], shifted_inv_angle_sum[3], + shifted_inv_angle_sum[4], shifted_inv_angle_sum[5], + shifted_inv_angle_sum[6], shifted_inv_angle_sum[7]); // These two are not needed. + + const __m256i vtranspose0 = _mm256_setr_epi8( + 0x00, 0x04, 0x08, 0x0c, 0x01, 0x05, 0x09, 0x0d, + 0x02, 0x06, 0x0a, 0x0e, 0x03, 0x07, 0x0b, 0x0f, + 0x00, 0x04, 0x08, 0x0c, 0x01, 0x05, 0x09, 0x0d, + 0x02, 0x06, 0x0a, 0x0e, 0x03, 0x07, 0x0b, 0x0f + ); + + const __m256i vtranspose1 = _mm256_setr_epi8( + 0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b, + 0x04, 0x05, 0x06, 0x07, 0x0c, 0x0d, 0x0e, 0x0f, + 0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b, + 0x04, 0x05, 0x06, 0x07, 0x0c, 0x0d, 0x0e, 0x0f + ); + + // For width 8, height must be at least 4 as PDPC is not done when height < 4. Handle 4 lines at once, this enables us to use gather for ref pixels. + for (int y = 0; y < height; y += 4) { + __m256i vdst = _mm256_i64gather_epi64((const long long int*)(dst + y * width), vidx, 1); + __m256i vleft = _mm256_i32gather_epi32((const int32_t*)&ref_side[y + 1], vidxleft, 1); + + // Transpose vleft + vleft = _mm256_shuffle_epi8(vleft, vtranspose0); + vleft = _mm256_permute4x64_epi64(vleft, _MM_SHUFFLE(3, 1, 2, 0)); + vleft = _mm256_shuffle_epi8(vleft, vtranspose1); + + __m256i vlo = _mm256_unpacklo_epi8(vdst, vleft); + __m256i vhi = _mm256_unpackhi_epi8(vdst, vleft); + + __m256i vmaddlo = _mm256_maddubs_epi16(vlo, vweight); + __m256i vmaddhi = _mm256_maddubs_epi16(vhi, vweight); + + vmaddlo = _mm256_add_epi16(vmaddlo, v32s); + vmaddhi = _mm256_add_epi16(vmaddhi, v32s); + + vmaddlo = _mm256_srai_epi16(vmaddlo, 6); + vmaddhi = _mm256_srai_epi16(vmaddhi, 6); + + __m256i packed = _mm256_packus_epi16(vmaddlo, vmaddhi); + + // TODO: if this if branch is deemed to cause slow down, make another version of this, where this check is not needed. + // If this does not slow down significantly, make this same check in other functions to reduce the function call switch case complexity + if (width == 8) { + _mm256_store_si256((__m256i*)(dst + (y * width)), packed); + } + else { + *(uint64_t*)(dst + (y + 0) * width) = _mm256_extract_epi64(packed, 0); + *(uint64_t*)(dst + (y + 1) * width) = _mm256_extract_epi64(packed, 1); + *(uint64_t*)(dst + (y + 2) * width) = _mm256_extract_epi64(packed, 2); + *(uint64_t*)(dst + (y + 3) * width) = _mm256_extract_epi64(packed, 3); + } + } +} + +static void angular_pdpc_ver_8x4_scale1_avx2(uvg_pixel* dst, const uvg_pixel* ref_side, const int width, const int height, const int mode_disp) +{ + // NOTE: This function is just the w8 function, retrofitted to work with width 16 and up when scale is 1. + // Since scale is 1, limit is 6 and therefore there is no meaningful work to be done when x > 6, so only the first column of 8x2 chunks is handled. + // This function handles cases where prediction angle is high. For PDPC, this means the needed reference samples are close together, enabling more effective loading. + const int scale = 1; + const int log2_width = uvg_g_convert_to_log2[width]; + + const int limit = 6; + + __m256i vseq = _mm256_setr_epi64x(0, 1, 2, 3); + __m256i vidx = _mm256_slli_epi32(vseq, log2_width); + __m256i v32s = _mm256_set1_epi16(32); + + const int offset = scale * 32; + const int inv_angle_offset = mode_disp * 64; + const int shuf_offset = mode_disp * 16; + + const __m256i vweight = _mm256_load_si256((const __m256i*) &intra_pdpc_w8_ver_improved_weight[offset]); + const int16_t* shifted_inv_angle_sum = &intra_pdpc_shifted_inv_angle_sum[inv_angle_offset]; + const __m128i vshuf = _mm_loadu_si128((__m128i*) &intra_pdpc_shuffle_vectors_8x2_scale1_ver[shuf_offset]); + + // For width 8, height must be at least 4 as PDPC is not done when height < 4. Handle 4 lines at once. + for (int y = 0; y < height; y += 4) { + __m128i vleft0 = _mm_loadu_si128((__m128i*) &ref_side[(y + 0) + shifted_inv_angle_sum[0] + 1]); + __m128i vleft1 = _mm_loadu_si128((__m128i*) &ref_side[(y + 2) + shifted_inv_angle_sum[0] + 1]); + vleft0 = _mm_shuffle_epi8(vleft0, vshuf); + vleft1 = _mm_shuffle_epi8(vleft1, vshuf); + + __m256i vleft = _mm256_inserti128_si256(_mm256_castsi128_si256(vleft0), vleft1, 1); + __m256i vdst = _mm256_i64gather_epi64((const long long int*)(dst + y * width), vidx, 1); + + __m256i vlo = _mm256_unpacklo_epi8(vdst, vleft); + __m256i vhi = _mm256_unpackhi_epi8(vdst, vleft); + + __m256i vmaddlo = _mm256_maddubs_epi16(vlo, vweight); + __m256i vmaddhi = _mm256_maddubs_epi16(vhi, vweight); + + vmaddlo = _mm256_add_epi16(vmaddlo, v32s); + vmaddhi = _mm256_add_epi16(vmaddhi, v32s); + + vmaddlo = _mm256_srai_epi16(vmaddlo, 6); + vmaddhi = _mm256_srai_epi16(vmaddhi, 6); + + __m256i packed = _mm256_packus_epi16(vmaddlo, vmaddhi); + + // TODO: if this if branch is deemed to cause slow down, make another version of this, where this check is not needed. + // If this does not slow down significantly, make this same check in other functions to reduce the function call switch case complexity + if (width == 8) { + _mm256_store_si256((__m256i*)(dst + (y * width)), packed); + } + else { + *(uint64_t*)(dst + (y + 0) * width) = _mm256_extract_epi64(packed, 0); + *(uint64_t*)(dst + (y + 1) * width) = _mm256_extract_epi64(packed, 1); + *(uint64_t*)(dst + (y + 2) * width) = _mm256_extract_epi64(packed, 2); + *(uint64_t*)(dst + (y + 3) * width) = _mm256_extract_epi64(packed, 3); + } + } +} + + +static void angular_pdpc_ver_w16_high_angle_avx2(uvg_pixel* dst, const uvg_pixel* ref_side, const int width, const int height, const int mode_disp) +{ + __m256i v32s = _mm256_set1_epi16(32); + const int scale = 2; // Other functions handle scales 0 and 1 + int limit = 12; // With scale 2, limit is always 12. + + const int offset = scale * 32; + const __m256i vweight = _mm256_load_si256((const __m256i*) &intra_pdpc_w16_ver_improved_weight[offset]); + + const int inv_angle_offset = mode_disp * 64; + const int16_t* shifted_inv_angle_sum = &intra_pdpc_shifted_inv_angle_sum[inv_angle_offset]; + + const __m256i vidx0 = _mm256_setr_epi32(shifted_inv_angle_sum[0], shifted_inv_angle_sum[1], + shifted_inv_angle_sum[2], shifted_inv_angle_sum[3], + shifted_inv_angle_sum[4], shifted_inv_angle_sum[5], + shifted_inv_angle_sum[6], shifted_inv_angle_sum[7]); + const __m256i vidx1 = _mm256_setr_epi32(shifted_inv_angle_sum[8], shifted_inv_angle_sum[9], + shifted_inv_angle_sum[10], shifted_inv_angle_sum[11], + shifted_inv_angle_sum[12], shifted_inv_angle_sum[13], // These are not used. + shifted_inv_angle_sum[14], shifted_inv_angle_sum[15]); // These are not used. + + const __m256i transpose = _mm256_setr_epi8( + 0x00, 0x04, 0x08, 0x0c, 0x01, 0x05, 0x09, 0x0d, + 0x02, 0x06, 0x0a, 0x0e, 0x03, 0x07, 0x0b, 0x0f, + 0x00, 0x04, 0x08, 0x0c, 0x01, 0x05, 0x09, 0x0d, + 0x02, 0x06, 0x0a, 0x0e, 0x03, 0x07, 0x0b, 0x0f + ); + + // 0xff are 'don't care' values, they will be zeroed out by coefficients + const __m256i vpermute = _mm256_setr_epi32( + 0x00, 0x04, 0x02, 0xff, 0x01, 0x05, 0x03, 0xff + ); + + // Handle 4 rows at once to enable gather for ref pixels. + for (int y = 0; y < height; y += 4) { + __m128i vdstraw0 = _mm_load_si128((const __m128i*)(dst + ((y + 0) * width))); + __m128i vdstraw1 = _mm_load_si128((const __m128i*)(dst + ((y + 1) * width))); + __m128i vdstraw2 = _mm_load_si128((const __m128i*)(dst + ((y + 2) * width))); + __m128i vdstraw3 = _mm_load_si128((const __m128i*)(dst + ((y + 3) * width))); + + __m256i vdst0 = _mm256_inserti128_si256(_mm256_castsi128_si256(vdstraw0), vdstraw1, 1); + __m256i vdst1 = _mm256_inserti128_si256(_mm256_castsi128_si256(vdstraw2), vdstraw3, 1); + + __m256i vleft0 = _mm256_i32gather_epi32((const int32_t*)&ref_side[y + 1], vidx0, 1); + __m256i vleft1 = _mm256_i32gather_epi32((const int32_t*)&ref_side[y + 1], vidx1, 1); + vleft0 = _mm256_shuffle_epi8(vleft0, transpose); + vleft1 = _mm256_shuffle_epi8(vleft1, transpose); + + __m256i vtmplo = _mm256_unpacklo_epi64(vleft0, vleft1); + __m256i vtmphi = _mm256_unpackhi_epi64(vleft0, vleft1); + + vleft0 = _mm256_permutevar8x32_epi32(vtmplo, vpermute); + vleft1 = _mm256_permutevar8x32_epi32(vtmphi, vpermute); + + __m256i vlo0 = _mm256_unpacklo_epi8(vdst0, vleft0); + __m256i vhi0 = _mm256_unpackhi_epi8(vdst0, vleft0); + __m256i vlo1 = _mm256_unpacklo_epi8(vdst1, vleft1); + __m256i vhi1 = _mm256_unpackhi_epi8(vdst1, vleft1); + + __m256i v0 = _mm256_permute2x128_si256(vlo0, vhi0, 0x20); + __m256i v1 = _mm256_permute2x128_si256(vlo0, vhi0, 0x31); + __m256i v2 = _mm256_permute2x128_si256(vlo1, vhi1, 0x20); + __m256i v3 = _mm256_permute2x128_si256(vlo1, vhi1, 0x31); + + __m256i vmadd0 = _mm256_maddubs_epi16(v0, vweight); + __m256i vmadd1 = _mm256_maddubs_epi16(v1, vweight); + __m256i vmadd2 = _mm256_maddubs_epi16(v2, vweight); + __m256i vmadd3 = _mm256_maddubs_epi16(v3, vweight); + + vmadd0 = _mm256_add_epi16(vmadd0, v32s); + vmadd1 = _mm256_add_epi16(vmadd1, v32s); + vmadd2 = _mm256_add_epi16(vmadd2, v32s); + vmadd3 = _mm256_add_epi16(vmadd3, v32s); + + vmadd0 = _mm256_srai_epi16(vmadd0, 6); + vmadd1 = _mm256_srai_epi16(vmadd1, 6); + vmadd2 = _mm256_srai_epi16(vmadd2, 6); + vmadd3 = _mm256_srai_epi16(vmadd3, 6); + + __m256i packed0 = _mm256_packus_epi16(vmadd0, vmadd1); + __m256i packed1 = _mm256_packus_epi16(vmadd2, vmadd3); + packed0 = _mm256_permute4x64_epi64(packed0, _MM_SHUFFLE(3, 1, 2, 0)); + packed1 = _mm256_permute4x64_epi64(packed1, _MM_SHUFFLE(3, 1, 2, 0)); + + if (width == 16) { + _mm256_store_si256((__m256i*)(dst + ((y + 0) * width)), packed0); + _mm256_store_si256((__m256i*)(dst + ((y + 2) * width)), packed1); + } + else { + _mm_store_si128((__m128i*)(dst + ((y + 0) * width)), _mm256_extracti128_si256(packed0, 0)); + _mm_store_si128((__m128i*)(dst + ((y + 1) * width)), _mm256_extracti128_si256(packed0, 1)); + _mm_store_si128((__m128i*)(dst + ((y + 2) * width)), _mm256_extracti128_si256(packed1, 0)); + _mm_store_si128((__m128i*)(dst + ((y + 3) * width)), _mm256_extracti128_si256(packed1, 1)); + } + } +} + +static void angular_pdpc_ver_w16_scale2_avx2(uvg_pixel* dst, const uvg_pixel* ref_side, const int width, const int height, const int mode_disp) +{ + __m128i v32s = _mm_set1_epi16(32); + const int scale = 2; // Other functions handle scales 0 and 1 + int limit = 12; // With scale 2, limit is always 12. + + const int offset = scale * 32; + const int inv_angle_offset = mode_disp * 64; + const int shuf_offset = mode_disp * 16; + + const __m128i vweightlo = _mm_load_si128((const __m128i*) &intra_pdpc_w16_ver_improved_weight[offset + 0]); + const __m128i vweighthi = _mm_load_si128((const __m128i*) &intra_pdpc_w16_ver_improved_weight[offset + 16]); + const int16_t* shifted_inv_angle_sum = &intra_pdpc_shifted_inv_angle_sum[inv_angle_offset]; + const __m128i vshuf = _mm_load_si128((const __m128i*) & intra_pdpc_shuffle_vectors_w16_scale2_ver[shuf_offset]); + + // Handle 2 rows at once. + for (int y = 0; y < height; ++y) { + for (int x = 0; x < limit; x += 16) { + __m128i vleft = _mm_loadu_si128((__m128i*) &ref_side[(y + 0) + shifted_inv_angle_sum[0] + 1]); + vleft = _mm_shuffle_epi8(vleft, vshuf); + + __m128i vdst = _mm_load_si128((const __m128i*)(dst + ((y + 0) * width + x))); + + __m128i vlo = _mm_unpacklo_epi8(vdst, vleft); + __m128i vhi = _mm_unpackhi_epi8(vdst, vleft); + + __m128i vmaddlo = _mm_maddubs_epi16(vlo, vweightlo); + __m128i vmaddhi = _mm_maddubs_epi16(vhi, vweighthi); + + vmaddlo = _mm_add_epi16(vmaddlo, v32s); + vmaddhi = _mm_add_epi16(vmaddhi, v32s); + + vmaddlo = _mm_srai_epi16(vmaddlo, 6); + vmaddhi = _mm_srai_epi16(vmaddhi, 6); + + __m128i packed = _mm_packus_epi16(vmaddlo, vmaddhi); + + _mm_store_si128((__m128i*)(dst + (y * width + x)), packed); + } + } +} + + +// Horizontal modes + +static void angular_pdpc_hor_w4_avx2(uvg_pixel* dst, const uvg_pixel* ref_side, const int height, const int scale, const int mode_disp) +{ + const int width = 4; + + int limit = MIN(3 << scale, height); + + __m128i v32s = _mm_set1_epi16(32); + + // Scale can be 0, 1 or 2 + const int table_offset = scale * 128; + const int shuf_offset = mode_disp * 256; + const int inv_angle_offset = mode_disp * 64; + const int16_t* shifted_inv_angle_sum = &intra_pdpc_shifted_inv_angle_sum[inv_angle_offset]; + + for (int y = 0, so = 0, wo = 0; y < limit; y += 4, so += 16, wo += 32) { + const __m128i vshuf = _mm_loadu_si128((__m128i*) & intra_pdpc_shuffle_vectors_w4_hor[shuf_offset + so]); + + __m128i vtop = _mm_loadu_si128((__m128i*) & ref_side[shifted_inv_angle_sum[y] + 1]); + vtop = _mm_shuffle_epi8(vtop, vshuf); + + const int offset = table_offset + wo; + + __m128i vdst = _mm_load_si128((const __m128i*)(dst + y * width)); + __m128i vweightlo = _mm_load_si128((const __m128i*) &intra_pdpc_w4_hor_improved_weight[offset + 0]); + __m128i vweighthi = _mm_load_si128((const __m128i*) &intra_pdpc_w4_hor_improved_weight[offset + 16]); + + __m128i vlo = _mm_unpacklo_epi8(vdst, vtop); + __m128i vhi = _mm_unpackhi_epi8(vdst, vtop); + + __m128i vmaddlo = _mm_maddubs_epi16(vlo, vweightlo); + __m128i vmaddhi = _mm_maddubs_epi16(vhi, vweighthi); + + vmaddlo = _mm_add_epi16(vmaddlo, v32s); + vmaddhi = _mm_add_epi16(vmaddhi, v32s); + + vmaddlo = _mm_srai_epi16(vmaddlo, 6); + vmaddhi = _mm_srai_epi16(vmaddhi, 6); + + __m128i packed = _mm_packus_epi16(vmaddlo, vmaddhi); + + _mm_storeu_si128((__m128i*)(dst + (y * width)), packed); + } +} + +static void angular_pdpc_hor_w8_avx2(uvg_pixel* dst, const uvg_pixel* ref_side, const int height, const int scale, const int mode_disp) +{ + const int width = 8; + + int limit = MIN(3 << scale, height); + + __m256i v32s = _mm256_set1_epi16(32); + + // Scale can be 0, 1 or 2 + const int table_offset = scale * 256; + const int inv_angle_offset = mode_disp * 64; + const int16_t* shifted_inv_angle_sum = &intra_pdpc_shifted_inv_angle_sum[inv_angle_offset]; + + // Handle 4 lines at once since PDPC is not done on 8x2 blocks. + for (int y = 0, o = table_offset; y < limit; y += 4, o += 64) { + const __m256i vweight01 = _mm256_load_si256((const __m256i*) &intra_pdpc_w8_hor_improved_weight[o + 0]); + const __m256i vweight23 = _mm256_load_si256((const __m256i*) &intra_pdpc_w8_hor_improved_weight[o + 32]); + + const __m256i vidx = _mm256_set_epi64x(shifted_inv_angle_sum[y + 3], shifted_inv_angle_sum[y + 2], + shifted_inv_angle_sum[y + 1], shifted_inv_angle_sum[y + 0]); + + __m256i vdst = _mm256_load_si256((const __m256i*)(dst + y * width)); + __m256i vtop = _mm256_i64gather_epi64((const long long int*)&ref_side[1], vidx, 1); + + __m256i vlo = _mm256_unpacklo_epi8(vdst, vtop); + __m256i vhi = _mm256_unpackhi_epi8(vdst, vtop); + + __m256i v01 = _mm256_permute2x128_si256(vlo, vhi, 0x20); + __m256i v23 = _mm256_permute2x128_si256(vlo, vhi, 0x31); + + __m256i vmadd01 = _mm256_maddubs_epi16(v01, vweight01); + __m256i vmadd23 = _mm256_maddubs_epi16(v23, vweight23); + + vmadd01 = _mm256_add_epi16(vmadd01, v32s); + vmadd23 = _mm256_add_epi16(vmadd23, v32s); + + vmadd01 = _mm256_srai_epi16(vmadd01, 6); + vmadd23 = _mm256_srai_epi16(vmadd23, 6); + + __m256i packed = _mm256_packus_epi16(vmadd01, vmadd23); + packed = _mm256_permute4x64_epi64(packed, _MM_SHUFFLE(3, 1, 2, 0)); + + _mm256_storeu_si256((__m256i*)(dst + (y * width)), packed); + } +} + +static void angular_pdpc_hor_w16_avx2(uvg_pixel* dst, const uvg_pixel* ref_side, const int width, const int height, const int scale, const int mode_disp) +{ + int limit = MIN(3 << scale, height); + __m128i v32s = _mm_set1_epi16(32); + + const int inv_angle_offset = mode_disp * 64; + const int16_t* shifted_inv_angle_sum = &intra_pdpc_shifted_inv_angle_sum[inv_angle_offset]; + + // Handle one line at a time. Skip line if vertical limit reached. + for (int y = 0; y < limit; ++y) { + const uint8_t weight1 = 32 >> (2 * y >> scale); + const uint8_t weight0 = 64 - weight1; + ALIGNED(2) const uint8_t tmp[2] = { weight0, weight1 }; + + __m128i vweight = _mm_set1_epi16(*(uint16_t*)tmp); + + for (int x = 0; x < width; x += 16) { + __m128i vdst = _mm_load_si128((__m128i*)(dst + (y * width + x))); + __m128i vtop = _mm_loadu_si128((__m128i*) &ref_side[x + shifted_inv_angle_sum[y] + 1]); + + __m128i vlo = _mm_unpacklo_epi8(vdst, vtop); + __m128i vhi = _mm_unpackhi_epi8(vdst, vtop); + + __m128i vmaddlo = _mm_maddubs_epi16(vlo, vweight); + __m128i vmaddhi = _mm_maddubs_epi16(vhi, vweight); + + vmaddlo = _mm_add_epi16(vmaddlo, v32s); + vmaddhi = _mm_add_epi16(vmaddhi, v32s); + + vmaddlo = _mm_srai_epi16(vmaddlo, 6); + vmaddhi = _mm_srai_epi16(vmaddhi, 6); + + __m128i packed = _mm_packus_epi16(vmaddlo, vmaddhi); + + _mm_storeu_si128((__m128i*)(dst + (y * width + x)), packed); + } + } +} + + +// Prediction mode 50 versions of PDPC functions. +static void angular_pdpc_mode50_w4_avx2(uvg_pixel* dst, const uvg_pixel top_left, const uvg_pixel* ref_side, const int height, const int scale) +{ + const int width = 4; + int limit = MIN(3 << scale, width); // Not used + + //__m128i vseq = _mm_setr_epi32(0, 1, 2, 3); + //__m128i vidx = _mm_slli_epi32(vseq, 2); // 2 is log2 width + __m256i v32s = _mm256_set1_epi16(32); + + // Scale can be 0, 1 or 2 + const int offset = scale * 16; + const __m256i vweight = _mm256_load_si256((const __m256i*) &intra_pdpc_w4_ver_weight[offset]); + const __m256i vtopleft = _mm256_set1_epi16((uint16_t)top_left); + + const __m128i vshuf = _mm_setr_epi8( + 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, + 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03 + ); + + // For a 4 width block, height must be at least 4. Handle 4 lines at once. + for (int y = 0; y < height; y += 4) { + const uint32_t ref4 = *(uint32_t*)&ref_side[1 + y]; + __m128i vref = _mm_set1_epi32(ref4); + vref = _mm_shuffle_epi8(vref, vshuf); + __m256i vref16 = _mm256_cvtepu8_epi16(vref); + + //__m128i vdst = _mm_i32gather_epi32((const int32_t*)(dst + y * width), vseq, 4); + __m128i vdst = _mm_load_si128((const __m128i*)(dst + y * width)); + __m256i vdst16 = _mm256_cvtepu8_epi16(vdst); + + __m256i accu = _mm256_sub_epi16(vref16, vtopleft); + accu = _mm256_mullo_epi16(vweight, accu); + accu = _mm256_add_epi16(accu, v32s); + accu = _mm256_srai_epi16(accu, 6); + accu = _mm256_add_epi16(vdst16, accu); + + __m128i lo = _mm256_castsi256_si128(accu); + __m128i hi = _mm256_extracti128_si256(accu, 1); + __m128i filtered = _mm_packus_epi16(lo, hi); + + _mm_store_si128((__m128i*)(dst + (y * width)), filtered); + } +} + +static void angular_pdpc_mode50_w8_avx2(uvg_pixel* dst, const uvg_pixel top_left, const uvg_pixel* ref_side, const int height, const int scale) +{ + const int width = 8; + int limit = MIN(3 << scale, width); // Not used. + + __m256i v32s = _mm256_set1_epi16(32); + + // Scale can be 0, 1 or 2 + const int offset = scale * 16; + const __m256i vweight = _mm256_load_si256((const __m256i*) &intra_pdpc_w8_ver_weight[offset]); + const __m256i vtopleft = _mm256_set1_epi16((uint16_t)top_left); + + const __m128i vshuf = _mm_setr_epi8( + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01 + ); + + // For width 8, height must be at least 2. Handle 2 lines at once. + for (int y = 0; y < height; y += 2) { + const uint16_t ref2 = *(uint16_t*)&ref_side[1 + y]; + __m128i vref = _mm_set1_epi16(ref2); + vref = _mm_shuffle_epi8(vref, vshuf); + __m256i vref16 = _mm256_cvtepu8_epi16(vref); + + __m128i vdst = _mm_load_si128((const __m128i*)(dst + y * width)); + __m256i vdst16 = _mm256_cvtepu8_epi16(vdst); + + __m256i accu = _mm256_sub_epi16(vref16, vtopleft); + accu = _mm256_mullo_epi16(vweight, accu); + accu = _mm256_add_epi16(accu, v32s); + accu = _mm256_srai_epi16(accu, 6); + accu = _mm256_add_epi16(vdst16, accu); + + __m128i lo = _mm256_castsi256_si128(accu); + __m128i hi = _mm256_extracti128_si256(accu, 1); + __m128i filtered = _mm_packus_epi16(lo, hi); + + _mm_store_si128((__m128i*)(dst + (y * width)), filtered); + } +} + +static void angular_pdpc_mode50_w16_avx2(uvg_pixel* dst, const uvg_pixel top_left, const uvg_pixel* ref_side, const int width, const int height, const int scale) +{ + int limit = MIN(3 << scale, width); // Not used. + + __m256i v32s = _mm256_set1_epi16(32); + + const int offset = scale * 16; + const __m256i vweight = _mm256_load_si256((const __m256i*) &intra_pdpc_w16_ver_weight[offset]); + const __m256i vtopleft = _mm256_set1_epi16((uint16_t)top_left); + + for (int y = 0; y < height; ++y) { + __m256i vref = _mm256_set1_epi16((int16_t)ref_side[1 + y]); + + __m128i vdst = _mm_load_si128((const __m128i*)(dst + y * width)); + __m256i vdst16 = _mm256_cvtepu8_epi16(vdst); + + __m256i accu = _mm256_sub_epi16(vref, vtopleft); + accu = _mm256_mullo_epi16(vweight, accu); + accu = _mm256_add_epi16(accu, v32s); + accu = _mm256_srai_epi16(accu, 6); + accu = _mm256_add_epi16(vdst16, accu); + + __m128i lo = _mm256_castsi256_si128(accu); + __m128i hi = _mm256_extracti128_si256(accu, 1); + __m128i filtered = _mm_packus_epi16(lo, hi); + + _mm_store_si128((__m128i*)(dst + y * width), filtered); + } +} + +static void angular_pdpc_mode50_scale1_avx2(uvg_pixel* dst, const uvg_pixel top_left, const uvg_pixel* ref_side, const int width, const int height) +{ + //const int scale = 1; + //int limit = MIN(3 << scale, width); // Not used. + + __m256i v32s = _mm256_set1_epi16(32); + + // Scale can be 0, 1 or 2 + const int offset = 16; // scale * 16 + const __m256i vweight = _mm256_load_si256((const __m256i*) & intra_pdpc_w8_ver_weight[offset]); + const __m256i vtopleft = _mm256_set1_epi16((uint16_t)top_left); + + const __m128i vshuf = _mm_setr_epi8( + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01 + ); + + const int log2w = uvg_g_convert_to_log2[width]; + __m128i vseq = _mm_setr_epi32(0x00, 0x00, 0x01, 0x00); + __m128i vidx = _mm_slli_epi64(vseq, log2w); + + // For width 8, height must be at least 2. Handle 2 lines at once. + for (int y = 0; y < height; y += 2) { + const uint16_t ref2 = *(uint16_t*)&ref_side[1 + y]; + __m128i vref = _mm_set1_epi16(ref2); + vref = _mm_shuffle_epi8(vref, vshuf); + __m256i vref16 = _mm256_cvtepu8_epi16(vref); + + //__m128i vdst = _mm_load_si128((const __m128i*)(dst + y * width)); + __m128i vdst = _mm_i64gather_epi64((const long long int*)(dst + y * width), vidx, 1); + __m256i vdst16 = _mm256_cvtepu8_epi16(vdst); + + __m256i accu = _mm256_sub_epi16(vref16, vtopleft); + accu = _mm256_mullo_epi16(vweight, accu); + accu = _mm256_add_epi16(accu, v32s); + accu = _mm256_srai_epi16(accu, 6); + accu = _mm256_add_epi16(vdst16, accu); + + __m128i lo = _mm256_castsi256_si128(accu); + __m128i hi = _mm256_extracti128_si256(accu, 1); + __m128i filtered = _mm_packus_epi16(lo, hi); + + //_mm_store_si128((__m128i*)(dst + (y * width)), filtered); + *(uint64_t*)(dst + ((y + 0) * width)) = _mm_extract_epi64(filtered, 0); + *(uint64_t*)(dst + ((y + 1) * width)) = _mm_extract_epi64(filtered, 1); + } +} + +// The main angular prediction entry point for AVX2. +/** + * \brief AVX2 version of angular intra prediction. + * \param cu_loc CU location and size data. + * \param intra_mode Intra prediction mode. + * \param channel_type Color channel. + * \param in_ref_above Pointer to -1 index of above reference. + * \param in_ref_left Pointer to -1 index of left reference. + * \param dst Buffer of size MAX_PRED_WIDTH * MAX_PRED_WIDTH. + * \param multi_ref_idx Multi reference index. + * \param isp_mode Intra sub-partition mode. + * \param cu_dim CU dimension, used along ISP mode. + */ +static void uvg_angular_pred_avx2( + const cu_loc_t* const cu_loc, + const int_fast8_t intra_mode, + const int_fast8_t channel_type, + const uvg_pixel* const in_ref_above, + const uvg_pixel* const in_ref_left, + uvg_pixel* const dst, + const uint8_t multi_ref_idx, + const uint8_t isp_mode, + const int cu_dim) + +{ + // ISP_TODO: non-square block implementation, height is passed but not used + int width = channel_type == COLOR_Y ? cu_loc->width : cu_loc->chroma_width; + int height = channel_type == COLOR_Y ? cu_loc->height : cu_loc->chroma_height; + const int log2_width = uvg_g_convert_to_log2[width]; + const int log2_height = uvg_g_convert_to_log2[height]; + + assert((log2_width >= 2 && log2_width <= 6) && (log2_height >= 0 && log2_height <= 6)); + + // For chroma blocks, height has to be at least 2 + if (channel_type != COLOR_Y) { + assert(log2_height >= 1); + } + + // Modes [-1, -14] and [67, 81] are wide angle modes + assert(intra_mode >= -14 && intra_mode <= 81); + + uint8_t multi_ref_index = channel_type == COLOR_Y ? multi_ref_idx : 0; + uint8_t isp = isp_mode; + + static const int16_t modedisp2sampledisp[32] = { 0, 1, 2, 3, 4, 6, 8, 10, 12, 14, 16, 18, 20, 23, 26, 29, 32, 35, 39, 45, 51, 57, 64, 73, 86, 102, 128, 171, 256, 341, 512, 1024 }; + static const int16_t modedisp2invsampledisp[32] = { 0, 16384, 8192, 5461, 4096, 2731, 2048, 1638, 1365, 1170, 1024, 910, 819, 712, 630, 565, 512, 468, 420, 364, 321, 287, 256, 224, 191, 161, 128, 96, 64, 48, 32, 16 }; // (512 * 32) / sampledisp + static const int32_t pre_scale[] = { 8, 7, 6, 5, 5, 4, 4, 4, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 0, 0, 0, -1, -1, -2, -3 }; + + // Temporary buffer for modes 11-25. + // It only needs to be big enough to hold indices from -width to width-1. + uvg_pixel temp_main[2 * 128 + 3 + 33 * MAX_REF_LINE_IDX]; + uvg_pixel temp_side[2 * 128 + 3 + 33 * MAX_REF_LINE_IDX]; + + int32_t pred_mode = intra_mode; // ToDo: handle WAIP + + // Whether to swap references to always project on the left reference row. + const bool vertical_mode = intra_mode >= 34; + + // Modes distance to horizontal or vertical mode. Possible values: [-16, 16] + // For pure vertical or horizontal modes, this is 0. For pure diagonal modes, this is either -16 or 16. + const int_fast8_t mode_disp = vertical_mode ? pred_mode - 50 : -(pred_mode - 18); + const int_fast8_t abs_mode_disp = abs(mode_disp); + const bool wide_angle_mode = mode_disp > 16; + + // Sample displacement per column in fractions of 32. + const int_fast16_t sample_disp = (mode_disp < 0 ? -1 : 1) * modedisp2sampledisp[abs_mode_disp]; + + const int side_size = vertical_mode ? log2_height : log2_width; + int scale = MIN(2, side_size - pre_scale[abs_mode_disp]); + + // Pointer for the reference we are interpolating from. + uvg_pixel* ref_main; + // Pointer for the other reference. + const uvg_pixel* ref_side; + + const int top_ref_length = isp_mode == ISP_MODE_VER ? width + cu_dim : width << 1; + const int left_ref_length = isp_mode == ISP_MODE_HOR ? height + cu_dim : height << 1; + + // Set ref_main and ref_side such that, when indexed with 0, they point to + // index 0 in block coordinates. + if (sample_disp < 0) { + // In cases where sample_disp is negative, references are needed from both sides. + // This step combines the main and side reference. + if (vertical_mode) { + memcpy(&temp_main[height], in_ref_above, (width + 2 + multi_ref_index) * sizeof(uvg_pixel)); + } + else { + memcpy(&temp_main[width], in_ref_left, (height + 2 + multi_ref_index) * sizeof(uvg_pixel)); + } + //memcpy(&temp_main[height], &in_ref_above[0], (width + 2 + multi_ref_index) * sizeof(uvg_pixel)); + //memcpy(&temp_side[width], &in_ref_left[0], (height + 2 + multi_ref_index) * sizeof(uvg_pixel)); + + ref_main = vertical_mode ? &temp_main[height] : &temp_main[width]; + ref_side = vertical_mode ? in_ref_left : in_ref_above; + + int size_side = vertical_mode ? height : width; + switch (size_side) { + case 4: + { + int shuf_offset = abs_mode_disp * 16; + __m128i vshuf = _mm_load_si128((__m128i*) &intra_refbuild_shuffle_vectors_sidesize_4[shuf_offset]); + __m128i vref = _mm_loadu_si128((const __m128i*) &ref_side[0]); + vref = _mm_shuffle_epi8(vref, vshuf); + /*uint32_t tmp = _mm_extract_epi32(vref, 0); + memcpy(&temp_main[0], &tmp, sizeof(uint32_t));*/ + _mm_maskstore_epi32((int32_t*)&temp_main[0], _mm_setr_epi32(0xffffffff, 0, 0, 0), vref); + break; + } + case 8: + { + int shuf_offset = abs_mode_disp * 16; + __m128i vshuf = _mm_load_si128((__m128i*) &intra_refbuild_shuffle_vectors_sidesize_8[shuf_offset]); + __m128i vref = _mm_loadu_si128((const __m128i*) &ref_side[0]); + vref = _mm_shuffle_epi8(vref, vshuf); + /*uint64_t tmp = _mm_extract_epi64(vref, 0); + memcpy(&temp_main[0], &tmp, sizeof(uint64_t));*/ + _mm_maskstore_epi32((int32_t*)&temp_main[0], _mm_setr_epi32(0xffffffff, 0xffffffff, 0, 0), vref); + break; + } + case 16: + { + int shuf_offset = abs_mode_disp * 16; + __m128i vshuf = _mm_load_si128((__m128i*) &intra_refbuild_shuffle_vectors_sidesize_16[shuf_offset]); + __m128i vref = _mm_loadu_si128((const __m128i*) &ref_side[1]); // Offset ref by one to fit all necessary 16 refs. Offset accounted for in shuffle vectors. + vref = _mm_shuffle_epi8(vref, vshuf); + _mm_store_si128((__m128i*) &temp_main[0], vref); + break; + } + case 32: + { + int shuf_offset = abs_mode_disp * 32; + __m128i vshufhi = _mm_load_si128((__m128i*) &intra_refbuild_shuffle_vectors_sidesize_32[shuf_offset + 0]); + __m128i vshuflo = _mm_load_si128((__m128i*) &intra_refbuild_shuffle_vectors_sidesize_32[shuf_offset + 16]); + __m128i vblend = _mm_cmpgt_epi8(vshuflo, _mm_set1_epi8(15)); + + __m128i vreflo = _mm_loadu_si128((const __m128i*) & ref_side[1]); // Offset ref by one to fit all necessary 16 refs. Offset accounted for in shuffle vectors. + __m128i vrefhi = _mm_loadu_si128((const __m128i*) & ref_side[17]); + + // Second half of references requires samples from both sides + __m128i vreftmphi = _mm_shuffle_epi8(vrefhi, vshuflo); + __m128i vreftmplo = _mm_shuffle_epi8(vreflo, vshuflo); + vreflo = _mm_blendv_epi8(vreftmplo, vreftmphi, vblend); + + // First half of references use references from the hi side only + vrefhi = _mm_shuffle_epi8(vrefhi, vshufhi); + + _mm_store_si128((__m128i*) &temp_main[0], vrefhi); + _mm_store_si128((__m128i*) &temp_main[16], vreflo); + break; + } + case 64: + { + int shuf_offset = abs_mode_disp * 64; + __m128i vshuf0 = _mm_load_si128((__m128i*) &intra_refbuild_shuffle_vectors_sidesize_64[shuf_offset + 0]); + __m128i vshuf1 = _mm_load_si128((__m128i*) &intra_refbuild_shuffle_vectors_sidesize_64[shuf_offset + 16]); + __m128i vshuf2 = _mm_load_si128((__m128i*) &intra_refbuild_shuffle_vectors_sidesize_64[shuf_offset + 32]); + __m128i vshuf3 = _mm_load_si128((__m128i*) &intra_refbuild_shuffle_vectors_sidesize_64[shuf_offset + 48]); + + __m128i vref0 = _mm_loadu_si128((const __m128i*) &ref_side[ 0 + 1]); // Offset ref by one to fit all necessary 16 refs. Offset accounted for in shuffle vectors. + __m128i vref1 = _mm_loadu_si128((const __m128i*) &ref_side[16 + 1]); + __m128i vref2 = _mm_loadu_si128((const __m128i*) &ref_side[32 + 1]); + __m128i vref3 = _mm_loadu_si128((const __m128i*) &ref_side[48 + 1]); + + // First quarter of references use references from vref3 only + __m128i vrefout0 = _mm_shuffle_epi8(vref3, vshuf0); + + // Second quarter can require samples from vref3 and vref2 + __m128i vreftmp0 = _mm_shuffle_epi8(vref3, vshuf1); + __m128i vreftmp1 = _mm_shuffle_epi8(vref2, vshuf1); + __m128i vblend0 = _mm_cmpgt_epi8(vshuf1, _mm_set1_epi8(47)); + __m128i vrefout1 = _mm_blendv_epi8(vreftmp1, vreftmp0, vblend0); + + // Third quarter can require samples from vref3, vref2 and vref1 + vreftmp0 = _mm_shuffle_epi8(vref3, vshuf2); + vreftmp1 = _mm_shuffle_epi8(vref2, vshuf2); + __m128i vreftmp2 = _mm_shuffle_epi8(vref1, vshuf2); + vblend0 = _mm_cmpgt_epi8(vshuf2, _mm_set1_epi8(47)); + __m128i vblend1 = _mm_cmpgt_epi8(vshuf2, _mm_set1_epi8(31)); + + vreftmp0 = _mm_blendv_epi8(vreftmp1, vreftmp0, vblend0); + __m128i vrefout2 = _mm_blendv_epi8(vreftmp2, vreftmp0, vblend1); + + // Fourth quarter can require samples from vref3, vref2, vref1 and vref0 + vreftmp0 = _mm_shuffle_epi8(vref3, vshuf3); + vreftmp1 = _mm_shuffle_epi8(vref2, vshuf3); + vreftmp2 = _mm_shuffle_epi8(vref1, vshuf3); + __m128i vreftmp3 = _mm_shuffle_epi8(vref0, vshuf3); + + vblend0 = _mm_cmpgt_epi8(vshuf3, _mm_set1_epi8(47)); + vblend1 = _mm_cmpgt_epi8(vshuf3, _mm_set1_epi8(31)); + __m128i vblend2 = _mm_cmpgt_epi8(vshuf3, _mm_set1_epi8(15)); + + vreftmp0 = _mm_blendv_epi8(vreftmp1, vreftmp0, vblend0); + vreftmp0 = _mm_blendv_epi8(vreftmp2, vreftmp0, vblend1); + __m128i vrefout3 = _mm_blendv_epi8(vreftmp3, vreftmp0, vblend2); + + _mm_store_si128((__m128i*) &temp_main[0], vrefout0); + _mm_store_si128((__m128i*) &temp_main[16], vrefout1); + _mm_store_si128((__m128i*) &temp_main[32], vrefout2); + _mm_store_si128((__m128i*) &temp_main[48], vrefout3); + break; + } + default: + // This should work in the case everything else fails. + const int modedisp2invsampledisp_abs = modedisp2invsampledisp[abs_mode_disp]; + for (int i = -size_side; i <= -1; i++) { + ref_main[i] = ref_side[MIN((-i * modedisp2invsampledisp_abs + 256) >> 9, size_side)]; + } + } + } + else { + memcpy(&temp_main[0], &in_ref_above[0], (top_ref_length + 1 + multi_ref_index) * sizeof(uvg_pixel)); + memcpy(&temp_side[0], &in_ref_left[0], (left_ref_length + 1 + multi_ref_index) * sizeof(uvg_pixel)); + + ref_main = vertical_mode ? temp_main : temp_side; + ref_side = vertical_mode ? temp_side : temp_main; + + const int log2_ratio = log2_width - log2_height; + const int s = MAX(0, vertical_mode ? log2_ratio : -log2_ratio); + const int max_index = (multi_ref_index << s) + 2; + int ref_length; + if (isp_mode) { + ref_length = vertical_mode ? top_ref_length : left_ref_length; + } + else { + ref_length = vertical_mode ? width << 1 : height << 1; + } + const uvg_pixel val = ref_main[ref_length + multi_ref_index]; + for (int j = 1; j <= max_index; j++) { + ref_main[ref_length + multi_ref_index + j] = val; + } + } + + // compensate for line offset in reference line buffers + ref_main += multi_ref_index; + ref_side += multi_ref_index; + //if (!vertical_mode) { SWAP(width, height, int) } + + static const int uvg_intra_hor_ver_dist_thres[8] = { 24, 24, 24, 14, 2, 0, 0, 0 }; + int filter_threshold = uvg_intra_hor_ver_dist_thres[(log2_width + log2_height) >> 1]; + int dist_from_vert_or_hor = MIN(abs((int32_t)pred_mode - 50), abs((int32_t)pred_mode - 18)); + + bool use_cubic = true; // Default to cubic filter + if (dist_from_vert_or_hor > filter_threshold) { + if ((abs(sample_disp) & 0x1F) != 0) + { + use_cubic = false; + } + } + // Cubic must be used if ref line != 0 or if isp mode != 0 + if (multi_ref_index || isp) { + use_cubic = true; + } + + //const int8_t* pfilter = use_cubic ? &cubic_filter_8bit_c[0][0] : &cubic_filter_8bit_g[0][0]; + const int8_t (*pfilter)[4] = use_cubic ? cubic_filter_8bit_c : cubic_filter_8bit_g; + + + + if (sample_disp != 0) { + // The mode is not horizontal or vertical, we have to do interpolation. + + // Set delta table pointers + const int table_offset = wide_angle_mode ? (pred_mode < 2 ? (pred_mode + 13) * DELTA_TABLE_ROW_LENGTH : (81 - pred_mode) * DELTA_TABLE_ROW_LENGTH) : (pred_mode <= 34 ? (pred_mode - 2) * DELTA_TABLE_ROW_LENGTH : (66 - pred_mode) * DELTA_TABLE_ROW_LENGTH); + const int16_t* delta_int = wide_angle_mode ? &delta_int_wide_angle_table[table_offset] : &delta_int_table[table_offset]; + delta_int += multi_ref_index; // TODO: This are not necessarily large enough for 64 dimension blocks + const int16_t* delta_fract = wide_angle_mode ? &delta_fract_wide_angle_table[table_offset] : &delta_fract_table[table_offset]; + delta_fract += multi_ref_index; + + // Check if the angle is fractional. If yes, interpolation is needed + if ((abs(sample_disp) & 0x1F) != 0) { + + // Luma Channel + if (channel_type == 0) { + if (vertical_mode) { + switch (width) { + case 4: angular_pred_w4_ver_avx2(dst, ref_main, delta_int, delta_fract, height, pfilter); break; + case 8: + if (height < 4) + angular_pred_w8_h2_ver_avx2(dst, ref_main, delta_int, delta_fract, height, pfilter); + else + angular_pred_w8_ver_avx2(dst, ref_main, delta_int, delta_fract, height, pfilter); + break; + case 16: // Use w16 function for all widths 16 and up + case 32: + case 64: angular_pred_w16_ver_avx2(dst, ref_main, delta_int, delta_fract, width, height, pfilter); break; + default: + assert(false && "Intra angular predicion: illegal width.\n"); + break; + } + } + else { + switch (width) { + case 4: + if (pred_mode < -7 || (multi_ref_index == 2 && pred_mode == -7)) // High angles need special handling + angular_pred_w4_hor_high_angle_avx2(dst, ref_main, delta_int, delta_fract, height, pfilter); + else + angular_pred_w4_hor_avx2(dst, ref_main, pred_mode, multi_ref_index, delta_int, delta_fract, height, pfilter); + + break; + case 8: + if (pred_mode < -2) + angular_pred_w8_hor_high_angle_avx2(dst, ref_main, delta_int, delta_fract, height, pfilter); + else + angular_pred_w8_hor_avx2(dst, ref_main, pred_mode, multi_ref_index, delta_int, delta_fract, height, pfilter); + + break; + case 16: + if (pred_mode < 5 || pred_mode == 33) + angular_pred_w16_hor_high_angle_avx2(dst, ref_main, delta_int, delta_fract, width, height, pfilter); + else + angular_pred_w16_hor_avx2(dst, ref_main, pred_mode, multi_ref_index, delta_int, delta_fract, height, pfilter); + + break; + case 32: + if (pred_mode < 5 || pred_mode == 33) + angular_pred_w16_hor_high_angle_avx2(dst, ref_main, delta_int, delta_fract, width, height, pfilter); + else + angular_pred_w32_hor_avx2(dst, ref_main, pred_mode, multi_ref_index, delta_int, delta_fract, width, height, pfilter); + + break; + case 64: + if (pred_mode < 5 || pred_mode == 33) + angular_pred_w16_hor_high_angle_avx2(dst, ref_main, delta_int, delta_fract, width, height, pfilter); + else + angular_pred_w32_hor_avx2(dst, ref_main, pred_mode, multi_ref_index, delta_int, delta_fract, width, height, pfilter); + + break; + default: + assert(false && "Intra angular predicion: illegal width.\n"); + break; + } + } + } + // Chroma channels + else { + // Do 2-tap linear filtering for chroma channels + + if (vertical_mode) { + switch (width) { + // No wide angle handling for w4 is needed. + case 4: angular_pred_linear_filter_w4_ver_avx2(dst, ref_main, height, delta_int, pred_mode); break; + case 8: angular_pred_linear_filter_w8_ver_avx2(dst, ref_main, height, delta_int, pred_mode); break; + case 16: angular_pred_linear_filter_w16_ver_avx2(dst, ref_main, height, delta_int, pred_mode); break; + case 32: angular_pred_linear_filter_w32_ver_avx2(dst, ref_main, height, delta_int, pred_mode); break; + default: + assert(false && "Intra angular predicion: illegal chroma width.\n"); + break; + } + } + else { + if (wide_angle_mode) { + switch (width) { + case 4: angular_pred_linear_filter_w4_hor_wide_angle_avx2(dst, ref_main, height, pred_mode, delta_int); break; + case 8: angular_pred_linear_filter_w8_hor_wide_angle_avx2(dst, ref_main, height, pred_mode, delta_int, delta_fract); break; + case 16: angular_pred_linear_filter_w16_hor_wide_angle_avx2(dst, ref_main, height, pred_mode, delta_int, delta_fract); break; + case 32: assert(false && "This code branch only works with UVG_FORMAT_P420."); break; // This branch is never executed with UVG_FORMAT_P420, due to chroma being only 32 width or height. + default: + assert(false && "Intra angular predicion: illegal chroma width.\n"); + break; + } + } + else { + switch (width) { + case 4: angular_pred_linear_filter_w4_hor_avx2(dst, ref_main, height, pred_mode, delta_int); break; + case 8: angular_pred_linear_filter_w8_hor_avx2(dst, ref_main, height, pred_mode, delta_int); break; + case 16: angular_pred_linear_filter_w16_hor_avx2(dst, ref_main, height, pred_mode, delta_int); break; + case 32: angular_pred_linear_filter_w32_hor_avx2(dst, ref_main, height, pred_mode, delta_int); break; + default: + assert(false && "Intra angular predicion: illegal chroma width.\n"); + break; + } + } + } + } + } + else { + // No interpolation or filtering needed, just copy the integer samples + if (vertical_mode) { + angular_pred_non_fractional_angle_pxl_copy_ver_avx2(dst, ref_main, width, height, delta_int); + } + else { + if (pred_mode == 2) { + switch (width) { + // Note: these functions do not need the delta int table as the mode is known + case 4: angular_pred_non_fractional_angle_pxl_copy_w4_mode2_hor_avx2(dst, ref_main, height, multi_ref_index); break; + case 8: angular_pred_non_fractional_angle_pxl_copy_w8_mode2_hor_avx2(dst, ref_main, height, multi_ref_index); break; + // Cases 16 onward can be solved with a simple memcpy + case 16: + for (int y = 0; y < height; ++y) { + // Offset indices by one since index 0 is top left and plus one since delta_int[0] for mode 2 is 1. + memcpy(&dst[y * 16], &ref_main[2 + y + multi_ref_index], 16 * sizeof(uvg_pixel)); + } + break; + case 32: + for (int y = 0; y < height; ++y) { + memcpy(&dst[y * 32], &ref_main[2 + y + multi_ref_index], 32 * sizeof(uvg_pixel)); + } + break; + case 64: + for (int y = 0; y < height; ++y) { + memcpy(&dst[y * 64], &ref_main[2 + y + multi_ref_index], 64 * sizeof(uvg_pixel)); + } + break; + default: + assert(false && "Intra angular predicion: illegal width.\n"); + break; + } + + } + else { + // Wide angle modes -12, -10, -8 and -4 + switch (width) { + case 4: angular_pred_non_fractional_angle_pxl_copy_w4_wide_angle_hor_avx2(dst, ref_main, height, delta_int); break; + case 8: angular_pred_non_fractional_angle_pxl_copy_w8_wide_angle_hor_avx2(dst, ref_main, height, delta_int); break; + case 16: angular_pred_non_fractional_angle_pxl_copy_w16_wide_angle_hor_avx2(dst, ref_main, height, delta_int); break; + case 32: angular_pred_non_fractional_angle_pxl_copy_w32_wide_angle_hor_avx2(dst, ref_main, height, delta_int); break; + // Width 64 never goes into this branch. Leave an assert here to catch future problems. + case 64: + //angular_pred_non_fractional_angle_pxl_copy_hor_avx2(dst, ref_main, width, height, delta_int); break; + assert(false && "Intra angular predicion: Non fractional angle pixel copy with width 64. This should never happen.\n"); + break; + default: + assert(false && "Intra angular predicion: illegal width.\n"); + break; + } + } + } + } + } + else { + // Mode is horizontal or vertical, just copy the pixels. + if (vertical_mode) { + for (int_fast32_t y = 0; y < height; ++y) { + switch (width) { + case 4: memcpy(&dst[y * 4], &ref_main[1], 4 * sizeof(uvg_pixel)); break; + case 8: memcpy(&dst[y * 8], &ref_main[1], 8 * sizeof(uvg_pixel)); break; + case 16: memcpy(&dst[y * 16], &ref_main[1], 16 * sizeof(uvg_pixel)); break; + case 32: memcpy(&dst[y * 32], &ref_main[1], 32 * sizeof(uvg_pixel)); break; + case 64: memcpy(&dst[y * 64], &ref_main[1], 64 * sizeof(uvg_pixel)); break; + } + } + } + else { + #define UNROLL(w, h) \ + if ((h) == height && (w) == width) { \ + for (int y = 0; y < (h); ++y) { \ + const __m128i vdst = _mm_set1_epi8(ref_main[y + 1]); \ + switch ((w)) {\ + case 4: _mm_storeu_si32((__m128i*) &dst[y * 4], vdst); break;\ + case 8: _mm_storeu_si64((__m128i*) &dst[y * 8], vdst); break;\ + case 16: _mm_store_si128((__m128i*) &dst[y * 16], vdst); break;\ + case 32:\ + _mm_store_si128((__m128i*) &dst[y * 32 + 0], vdst);\ + _mm_store_si128((__m128i*) &dst[y * 32 + 16], vdst);\ + break;\ + case 64: \ + _mm_store_si128((__m128i*) &dst[y * 64 + 0], vdst);\ + _mm_store_si128((__m128i*) &dst[y * 64 + 16], vdst);\ + _mm_store_si128((__m128i*) &dst[y * 64 + 32], vdst);\ + _mm_store_si128((__m128i*) &dst[y * 64 + 48], vdst);\ + break; \ + default:\ + assert(false && "Intra angular predicion: illegal width.\n");\ + break;\ + }\ + } \ + } + UNROLL(4, 4); + UNROLL(4, 8); + UNROLL(4, 16); + UNROLL(4, 32); + UNROLL(4, 64); + UNROLL(8, 2); + UNROLL(8, 4); + UNROLL(8, 8); + UNROLL(8, 16); + UNROLL(8, 32); + UNROLL(8, 64); + UNROLL(16, 1); + UNROLL(16, 2); + UNROLL(16, 4); + UNROLL(16, 8); + UNROLL(16, 16); + UNROLL(16, 32); + UNROLL(16, 64); + UNROLL(32, 1); + UNROLL(32, 2); + UNROLL(32, 4); + UNROLL(32, 8); + UNROLL(32, 16); + UNROLL(32, 32); + UNROLL(32, 64); + UNROLL(64, 1); + UNROLL(64, 2); + UNROLL(64, 4); + UNROLL(64, 8); + UNROLL(64, 16); + UNROLL(64, 32); + UNROLL(64, 64); + #undef UNROLL + } + } + + + bool PDPC_filter = (width >= TR_MIN_WIDTH && height >= TR_MIN_WIDTH && multi_ref_index == 0); + if (pred_mode > 1 && pred_mode < 67) { + // Disable PDPC filter if both references are used or if MRL is used + if (mode_disp < 0 || multi_ref_index) { + PDPC_filter = false; + } + else if (mode_disp > 0) { + // If scale is negative, PDPC filtering has no effect, therefore disable it. + PDPC_filter &= (scale >= 0); + } + } + if (PDPC_filter) { + // Handle pure horizontal and vertical with separate PDPC solution + if (pred_mode == 18) { + scale = (log2_width + log2_height - 2) >> 2; + const uvg_pixel top_left = ref_main[0]; + + switch (width) { + case 4: angular_pdpc_mode18_w4_avx2(dst, top_left, ref_side, height, scale); break; + case 8: angular_pdpc_mode18_w8_avx2(dst, top_left, ref_side, height, scale); break; + case 16: angular_pdpc_mode18_w16_avx2(dst, top_left, ref_side, height, scale); break; + case 32: angular_pdpc_mode18_w32_avx2(dst, top_left, ref_side, height, scale); break; + case 64: angular_pdpc_mode18_w64_avx2(dst, top_left, ref_side, height, scale); break; + default: + assert(false && "Intra PDPC, invalid width.\n"); + break; + } + } + else if (pred_mode == 50) { + scale = (log2_width + log2_height - 2) >> 2; + const uvg_pixel top_left = ref_main[0]; + switch (width) { + case 4: angular_pdpc_mode50_w4_avx2(dst, top_left, ref_side, height, scale); break; + case 8: angular_pdpc_mode50_w8_avx2(dst, top_left, ref_side, height, scale); break; + case 16: // 16 and higher handled by same functions. + case 32: + case 64: + if (scale == 1) { + angular_pdpc_mode50_scale1_avx2(dst, top_left, ref_side, width, height); + } + else { + angular_pdpc_mode50_w16_avx2(dst, top_left, ref_side, width, height, scale); + } + break; + default: + assert(false && "Intra PDPC, invalid width.\n"); + break; + } + } + else { + if (vertical_mode) { + // Note: no need to check for negative mode_disp, since it is already checked before. + switch (width) { + case 4: + // Low mode disp -> high angle. For pdpc, this causes the needed references to be extremely sparse making loads without using gathers impossible. + // Handle low angles with more tight reference spacing with separate functions with more optimized loads. + if (mode_disp < 6) + angular_pdpc_ver_w4_high_angle_avx2(dst, ref_side, height, scale, mode_disp); + else + angular_pdpc_ver_w4_avx2(dst, ref_side, height, scale, mode_disp); + break; + case 8: + if (scale == 0) { + if (mode_disp < 6) + angular_pdpc_ver_4x4_scale0_high_angle_avx2(dst, ref_side, width, height, mode_disp); + else + angular_pdpc_ver_4x4_scale0_avx2(dst, ref_side, width, height, mode_disp); + } + else /*if (scale == 1)*/ { + if (mode_disp < 8) + angular_pdpc_ver_8x4_scale1_high_angle_avx2(dst, ref_side, width, height, mode_disp); + else + angular_pdpc_ver_8x4_scale1_avx2(dst, ref_side, width, height, mode_disp); + } + // This branch was never executed. There is no case where width == 8 and scale == 2 and PDPC is enabled. + /*else { + if (mode_disp < 10) + angular_pdpc_ver_w8_high_angle_avx2(dst, ref_side, height, mode_disp); + else + angular_pdpc_ver_8x2_scale2_avx2(dst, ref_side, width, height, mode_disp); + }*/ + break; + case 16: // 16 width and higher done with the same functions + case 32: + case 64: + switch (scale) { + case 0: + if (mode_disp < 6) + angular_pdpc_ver_4x4_scale0_high_angle_avx2(dst, ref_side, width, height, mode_disp); + else + angular_pdpc_ver_4x4_scale0_avx2(dst, ref_side, width, height, mode_disp); + break; + case 1: + if (mode_disp < 8) + angular_pdpc_ver_8x4_scale1_high_angle_avx2(dst, ref_side, width, height, mode_disp); + else + angular_pdpc_ver_8x4_scale1_avx2(dst, ref_side, width, height, mode_disp); + break; + case 2: + if (mode_disp < 14) + angular_pdpc_ver_w16_high_angle_avx2(dst, ref_side, width, height, mode_disp); + else + angular_pdpc_ver_w16_scale2_avx2(dst, ref_side, width, height, mode_disp); + break; + default: + assert(false && "Intra PDPC: Invalid scale.\n"); + } + break; + default: + assert(false && "Intra PDPC: Invalid width.\n"); + } + } + else { + switch (width) { + case 4: + // Low mode disp -> high angle. For pdpc, this causes the needed references to be extremely sparse making loads without using gathers impossible. + // Handle low angles with more tight reference spacing with separate functions with more optimized loads. + /*if (mode_disp < 6) + angular_pdpc_hor_w4_high_angle_improved_avx2(dst, ref_side, height, scale, mode_disp); + else*/ + // The above code was not accessed ever. There is no case where width == 4 and and mode disp < 6 for horizontal modes where PDPC is enabled. + angular_pdpc_hor_w4_avx2(dst, ref_side, height, scale, mode_disp); + break; + case 8: angular_pdpc_hor_w8_avx2(dst, ref_side, height, scale, mode_disp); break; + case 16: // 16 width and higher done with the same function + case 32: + case 64: angular_pdpc_hor_w16_avx2(dst, ref_side, width, height, scale, mode_disp); break; + default: + assert(false && "Intra PDPC: Invalid width.\n"); + } + } + } + } +} + + +typedef void (intra_planar_half_func)(const uvg_pixel* ref_main, const uvg_pixel* ref_side, const int line, const int shift, __m256i* dst); + +// w1 and w2 for planar horizontal do not exist, since intra prediction must be at least of width 4 +// Also worth noting is that minimum amount of samples must be 16, +// therefore the smallest possible predictions are 4x4, 8x2 and 16x1 +static void intra_pred_planar_hor_w4(const uvg_pixel* ref, const uvg_pixel* ref_side, const int line, const int shift, __m256i* dst) +{ + const __m256i v_last_ref = _mm256_set1_epi16(ref_side[4 + 1]); + + const __m256i v_ref_coeff = _mm256_setr_epi16(3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0); + const __m256i v_last_ref_coeff = _mm256_setr_epi16(1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4); + + const __m256i v_last_ref_mul = _mm256_mullo_epi16(v_last_ref, v_last_ref_coeff); + const __m256i shuffle_mask = _mm256_setr_epi8(0, -1, 0, -1, 0, -1, 0, -1, 8, -1, 8, -1, 8, -1, 8, -1, 0, -1, 0, -1, 0, -1, 0, -1, 8, -1, 8, -1, 8, -1, 8, -1); + + // Handle 4 lines at a time + #define UNROLL_LOOP(num) \ + for (int i = 0, d = 0; i < (num); i += 4, ++d) { \ + /* | ref1 | ref2 | ref3 | ref4 | Don't care*/ \ + __m128i v_ref_0 = _mm_loadu_si128((__m128i const*)& ref[i + 1]); \ + /* | ref1 | 0 * 7 | ref2 | 0 * 7 | ref3 | 0 * 7 | ref4 | 0* 7 | */ \ + __m256i v_ref = _mm256_cvtepu8_epi64(v_ref_0); \ + /* | ref1_l | ref1_h | ref1_l | ref1_h | ... */ \ + v_ref = _mm256_shuffle_epi8(v_ref, shuffle_mask); \ + \ + __m256i v_tmp = _mm256_mullo_epi16(v_ref, v_ref_coeff); \ + \ + dst[d] = _mm256_add_epi16(v_last_ref_mul, v_tmp); \ + } + + switch (line) { + case 1: UNROLL_LOOP(1); break; + case 2: UNROLL_LOOP(2); break; + case 4: UNROLL_LOOP(4); break; + case 8: UNROLL_LOOP(8); break; + case 16: UNROLL_LOOP(16); break; + case 32: UNROLL_LOOP(32); break; + case 64: UNROLL_LOOP(64); break; + default: + assert(false && "Invalid dimension."); + break; + } + #undef UNROLL_LOOP +} +static void intra_pred_planar_hor_w8(const uvg_pixel* ref, const uvg_pixel* ref_side, const int line, const int shift, __m256i* dst) +{ + const __m256i v_last_ref = _mm256_set1_epi16(ref_side[8 + 1]); + + const __m256i v_ref_coeff = _mm256_setr_epi16(7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0); + const __m256i v_last_ref_coeff = _mm256_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + const __m256i v_last_ref_mul = _mm256_mullo_epi16(v_last_ref, v_last_ref_coeff); + + // Handle 2 lines at a time + #define UNROLL_LOOP(num) \ + for (int i = 0, d = 0; i < (num); i += 2, ++d) { \ + __m128i v_ref0 = _mm_set1_epi16(ref[i + 1]); \ + __m128i v_ref1 = _mm_set1_epi16(ref[i + 2]); \ + \ + __m256i v_ref = _mm256_castsi128_si256(v_ref0); \ + v_ref = _mm256_inserti128_si256(v_ref, v_ref1, 1); \ + \ + __m256i v_tmp = _mm256_mullo_epi16(v_ref, v_ref_coeff); \ + \ + dst[d] = _mm256_add_epi16(v_last_ref_mul, v_tmp); \ + } + + switch (line) { + case 1: UNROLL_LOOP(1); break; + case 2: UNROLL_LOOP(2); break; + case 4: UNROLL_LOOP(4); break; + case 8: UNROLL_LOOP(8); break; + case 16: UNROLL_LOOP(16); break; + case 32: UNROLL_LOOP(32); break; + case 64: UNROLL_LOOP(64); break; + default: + assert(false && "Invalid dimension."); + break; + } + #undef UNROLL_LOOP +} +static void intra_pred_planar_hor_w16(const uvg_pixel* ref, const uvg_pixel* ref_side, const int line, const int shift, __m256i* dst) +{ + const __m256i v_last_ref = _mm256_set1_epi16(ref_side[16 + 1]); + + const __m256i v_ref_coeff = _mm256_setr_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + const __m256i v_last_ref_coeff = _mm256_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + + const __m256i v_last_ref_mul = _mm256_mullo_epi16(v_last_ref, v_last_ref_coeff); + + #define UNROLL_LOOP(num) \ + for (int i = 0, d = 0; i < (num); ++i, ++d) { \ + __m256i v_ref = _mm256_set1_epi16(ref[i + 1]); \ + __m256i v_tmp = _mm256_mullo_epi16(v_ref, v_ref_coeff); \ + dst[d] = _mm256_add_epi16(v_last_ref_mul, v_tmp); \ + } + + switch (line) { + case 1: UNROLL_LOOP(1); break; + case 2: UNROLL_LOOP(2); break; + case 4: UNROLL_LOOP(4); break; + case 8: UNROLL_LOOP(8); break; + case 16: UNROLL_LOOP(16); break; + case 32: UNROLL_LOOP(32); break; + case 64: UNROLL_LOOP(64); break; + default: + assert(false && "Invalid dimension."); + break; + } + #undef UNROLL_LOOP +} +static void intra_pred_planar_hor_w32(const uvg_pixel* ref, const uvg_pixel* ref_side, const int line, const int shift, __m256i* dst) +{ + const __m256i v_last_ref = _mm256_set1_epi16(ref_side[32 + 1]); + + const __m256i v_ref_coeff0 = _mm256_setr_epi16(31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16); + const __m256i v_ref_coeff1 = _mm256_setr_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + + const __m256i v_last_ref_coeff0 = _mm256_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + const __m256i v_last_ref_coeff1 = _mm256_setr_epi16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + + const __m256i v_last_ref_mul0 = _mm256_mullo_epi16(v_last_ref, v_last_ref_coeff0); + const __m256i v_last_ref_mul1 = _mm256_mullo_epi16(v_last_ref, v_last_ref_coeff1); + + #define UNROLL_LOOP(num) \ + for (int i = 0, d = 0; i < (num); ++i, d += 2) { \ + __m256i v_ref = _mm256_set1_epi16(ref[i + 1]); \ + __m256i v_tmp0 = _mm256_mullo_epi16(v_ref, v_ref_coeff0); \ + __m256i v_tmp1 = _mm256_mullo_epi16(v_ref, v_ref_coeff1); \ + dst[d + 0] = _mm256_add_epi16(v_last_ref_mul0, v_tmp0); \ + dst[d + 1] = _mm256_add_epi16(v_last_ref_mul1, v_tmp1); \ + } + + switch (line) { + case 1: UNROLL_LOOP(1); break; + case 2: UNROLL_LOOP(2); break; + case 4: UNROLL_LOOP(4); break; + case 8: UNROLL_LOOP(8); break; + case 16: UNROLL_LOOP(16); break; + case 32: UNROLL_LOOP(32); break; + case 64: UNROLL_LOOP(64); break; + default: + assert(false && "Invalid dimension."); + break; + } + #undef UNROLL_LOOP +} +static void intra_pred_planar_hor_w64(const uvg_pixel* ref, const uvg_pixel* ref_side, const int line, const int shift, __m256i* dst) +{ + const __m256i v_last_ref = _mm256_set1_epi16(ref_side[64 + 1]); + + const __m256i v_ref_coeff0 = _mm256_setr_epi16(63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48); + const __m256i v_ref_coeff1 = _mm256_setr_epi16(47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32); + const __m256i v_ref_coeff2 = _mm256_setr_epi16(31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16); + const __m256i v_ref_coeff3 = _mm256_setr_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + + const __m256i v_last_ref_coeff0 = _mm256_setr_epi16( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + const __m256i v_last_ref_coeff1 = _mm256_setr_epi16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + const __m256i v_last_ref_coeff2 = _mm256_setr_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48); + const __m256i v_last_ref_coeff3 = _mm256_setr_epi16(49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); + + const __m256i v_last_ref_mul0 = _mm256_mullo_epi16(v_last_ref, v_last_ref_coeff0); + const __m256i v_last_ref_mul1 = _mm256_mullo_epi16(v_last_ref, v_last_ref_coeff1); + const __m256i v_last_ref_mul2 = _mm256_mullo_epi16(v_last_ref, v_last_ref_coeff2); + const __m256i v_last_ref_mul3 = _mm256_mullo_epi16(v_last_ref, v_last_ref_coeff3); + + for (int i = 0, d = 0; i < line; ++i, d += 4) { + __m256i v_ref = _mm256_set1_epi16(ref[i + 1]); + __m256i v_tmp0 = _mm256_mullo_epi16(v_ref, v_ref_coeff0); + __m256i v_tmp1 = _mm256_mullo_epi16(v_ref, v_ref_coeff1); + __m256i v_tmp2 = _mm256_mullo_epi16(v_ref, v_ref_coeff2); + __m256i v_tmp3 = _mm256_mullo_epi16(v_ref, v_ref_coeff3); + dst[d + 0] = _mm256_add_epi16(v_last_ref_mul0, v_tmp0); + dst[d + 1] = _mm256_add_epi16(v_last_ref_mul1, v_tmp1); + dst[d + 2] = _mm256_add_epi16(v_last_ref_mul2, v_tmp2); + dst[d + 3] = _mm256_add_epi16(v_last_ref_mul3, v_tmp3); + } +} + +static void intra_pred_planar_ver_w4(const uvg_pixel* ref, const uvg_pixel* ref_side, const int line, const int shift, __m256i* dst) +{ + const __m256i v_last_ref = _mm256_set1_epi8(ref_side[line + 1]); + + // Overflow possible for this width if line > 32 + const bool overflow = line > 32; + + // Got four 8-bit references, or 32 bits of data. Duplicate to fill a whole 256-bit vector. + const uint32_t* tmp = (const uint32_t*)&ref[1]; // Cast to 32 bit int to load 4 refs at the same time + const __m256i v_ref = _mm256_set1_epi32(*tmp); + + const __m256i* v_ys = (const __m256i*)planar_avx2_ver_w4ys; + + // Table offset + int offset; + switch (line) { + case 64: offset = 0; break; + case 32: offset = 16; break; + case 16: offset = 24; break; + case 8: offset = 28; break; + case 4: offset = 30; break; + default: + assert(false && "Invalid height for width 4."); + break; + } + + // Handle 4 lines at a time + #define UNROLL_LOOP(num) \ + for (int y = 0, s = offset, d = 0; y < (num); y += 4, ++s, ++d) { \ + __m256i v_lo = _mm256_unpacklo_epi8(v_ref, v_last_ref); \ + dst[d] = _mm256_maddubs_epi16(v_lo, v_ys[s]); \ + } + + switch (line) { + case 1: UNROLL_LOOP(1); break; + case 2: UNROLL_LOOP(2); break; + case 4: UNROLL_LOOP(4); break; + case 8: UNROLL_LOOP(8); break; + case 16: UNROLL_LOOP(16); break; + case 32: UNROLL_LOOP(32); break; + case 64: UNROLL_LOOP(64); break; + default: + assert(false && "Invalid dimension."); + break; + } + #undef UNROLL_LOOP +} +static void intra_pred_planar_ver_w8(const uvg_pixel* ref, const uvg_pixel* ref_side, const int line, const int shift, __m256i* dst) +{ + const __m256i v_last_ref = _mm256_set1_epi8(ref_side[line + 1]); + + // Got eight 8-bit samples, or 64 bits of data. Duplicate to fill a whole 256-bit vector. + const __m128i v_ref_raw = _mm_loadu_si128((const __m128i*)&ref[1]); + __m256i v_ref = _mm256_castsi128_si256(v_ref_raw); + v_ref = _mm256_inserti128_si256(v_ref, v_ref_raw, 1); + v_ref = _mm256_shuffle_epi32(v_ref, _MM_SHUFFLE(1, 1, 0, 0)); + + const __m256i* v_ys = (const __m256i*)planar_avx2_ver_w4ys; + + // Table offset + int offset; + switch (line) { + case 64: offset = 0; break; + case 32: offset = 16; break; + case 16: offset = 24; break; + case 8: offset = 28; break; + case 4: offset = 30; break; + case 2: offset = 31; break; + default: + assert(false && "Invalid height for width 8."); + break; + } + + // Handle 4 lines at a time + #define UNROLL_LOOP(num) \ + for (int y = 0, s = offset, d = 0; y < (num); y += 4, ++s, d += 2) { \ + __m256i v_lo = _mm256_unpacklo_epi8(v_ref, v_last_ref); \ + __m256i v_hi = _mm256_unpackhi_epi8(v_ref, v_last_ref); \ + \ + __m256i v_madd_lo = _mm256_maddubs_epi16(v_lo, v_ys[s]); \ + __m256i v_madd_hi = _mm256_maddubs_epi16(v_hi, v_ys[s]); \ + __m256i v_tmp0 = _mm256_permute2x128_si256(v_madd_lo, v_madd_hi, 0x20); \ + __m256i v_tmp1 = _mm256_permute2x128_si256(v_madd_lo, v_madd_hi, 0x31); \ + \ + dst[d + 0] = _mm256_permute4x64_epi64(v_tmp0, _MM_SHUFFLE(3, 1, 2, 0)); \ + dst[d + 1] = _mm256_permute4x64_epi64(v_tmp1, _MM_SHUFFLE(3, 1, 2, 0)); \ + } + + switch (line) { + case 1: UNROLL_LOOP(1); break; + case 2: UNROLL_LOOP(2); break; + case 4: UNROLL_LOOP(4); break; + case 8: UNROLL_LOOP(8); break; + case 16: UNROLL_LOOP(16); break; + case 32: UNROLL_LOOP(32); break; + case 64: UNROLL_LOOP(64); break; + default: + assert(false && "Invalid dimension."); + break; + } + #undef UNROLL_LOOP +} +static void intra_pred_planar_ver_w16(const uvg_pixel* ref, const uvg_pixel* ref_side, const int line, const int shift, __m256i* dst) +{ + const __m256i v_last_ref = _mm256_set1_epi8(ref_side[line + 1]); + + // Got 16 8-bit samples, or 128 bits of data. Duplicate to fill a whole 256-bit vector. + const __m128i v_ref_raw = _mm_loadu_si128((const __m128i*) &ref[1]); + __m256i v_ref = _mm256_castsi128_si256(v_ref_raw); + v_ref = _mm256_inserti128_si256(v_ref, v_ref_raw, 1); + + const __m256i* v_ys = (const __m256i*)planar_avx2_ver_w8ys; + + // Table offset + int offset; + switch (line) { + case 64: offset = 0; break; + case 32: offset = 32; break; + case 16: offset = 48; break; + case 8: offset = 56; break; + case 4: offset = 60; break; + case 2: offset = 62; break; + case 1: offset = 64; break; + default: + assert(false && "Invalid height for width 16."); + break; + } + + // Calculations for cases where line > 2 + // These stay constant through the loop + const __m256i v_lo = _mm256_unpacklo_epi8(v_ref, v_last_ref); + const __m256i v_hi = _mm256_unpackhi_epi8(v_ref, v_last_ref); + + // Handle 2 lines at a time + #define UNROLL_LOOP(num) \ + for (int y = 0, s = offset; y < line; y += 2, ++s) { \ + __m256i v_madd_lo = _mm256_maddubs_epi16(v_lo, v_ys[s]); \ + __m256i v_madd_hi = _mm256_maddubs_epi16(v_hi, v_ys[s]); \ + dst[y + 0] = _mm256_permute2x128_si256(v_madd_lo, v_madd_hi, 0x20); \ + dst[y + 1] = _mm256_permute2x128_si256(v_madd_lo, v_madd_hi, 0x31); \ + } + + __m256i v_tmp; + switch (line) { + case 1: + // Specialized calculation for line == 1 + v_tmp = _mm256_permute2x128_si256(v_lo, v_hi, 0x20); + dst[0] = _mm256_maddubs_epi16(v_tmp, v_ys[offset + 0]); + break; + case 2: + // Specialized calculation for line == 2 + v_tmp = _mm256_permute2x128_si256(v_lo, v_hi, 0x20); + dst[0] = _mm256_maddubs_epi16(v_tmp, v_ys[offset + 0]); + dst[1] = _mm256_maddubs_epi16(v_tmp, v_ys[offset + 1]); + break; + case 4: UNROLL_LOOP(4); break; + case 8: UNROLL_LOOP(8); break; + case 16: UNROLL_LOOP(16); break; + case 32: UNROLL_LOOP(32); break; + case 64: UNROLL_LOOP(64); break; + default: + assert(false && "Invalid dimension."); + break; + } +#undef UNROLL_LOOP +} +static void intra_pred_planar_ver_w32(const uvg_pixel* ref, const uvg_pixel* ref_side, const int line, const int shift, __m256i* dst) +{ + const __m256i v_last_ref = _mm256_set1_epi8(ref_side[line + 1]); + + // Got 32 8-bit samples, or 256 bits of data. Load into a single vector + const __m256i v_ref = _mm256_loadu_si256((const __m256i*) &ref[1]); + + // These stay constant through the loop + const __m256i v_lo = _mm256_unpacklo_epi8(v_ref, v_last_ref); + const __m256i v_hi = _mm256_unpackhi_epi8(v_ref, v_last_ref); + + #define UNROLL_LOOP(num) \ + for (uint8_t y = 0, a = (num) - 1, b = 1, d = 0; y < (num); ++y, --a, ++b, d += 2) { \ + uint8_t tmp[2] = {a, b}; \ + uint16_t* tmp2 = (uint16_t*)tmp; \ + const __m256i v_ys = _mm256_set1_epi16(*tmp2); \ + \ + __m256i v_madd_lo = _mm256_maddubs_epi16(v_lo, v_ys); \ + __m256i v_madd_hi = _mm256_maddubs_epi16(v_hi, v_ys); \ + dst[d + 0] = _mm256_permute2x128_si256(v_madd_lo, v_madd_hi, 0x20); \ + dst[d + 1] = _mm256_permute2x128_si256(v_madd_lo, v_madd_hi, 0x31); \ + } + + switch (line) { + case 1: UNROLL_LOOP(1); break; + case 2: UNROLL_LOOP(2); break; + case 4: UNROLL_LOOP(4); break; + case 8: UNROLL_LOOP(8); break; + case 16: UNROLL_LOOP(16); break; + case 32: UNROLL_LOOP(32); break; + case 64: UNROLL_LOOP(64); break; + default: + assert(false && "Invalid dimension."); + break; + } + #undef UNROLL_LOOP +} +static void intra_pred_planar_ver_w64(const uvg_pixel* ref, const uvg_pixel* ref_side, const int line, const int shift, __m256i* dst) +{ + const __m256i v_last_ref = _mm256_set1_epi8(ref_side[line + 1]); + + // Got 64 8-bit samples, or 512 bits of data. Load into two vectors + const __m256i v_ref0 = _mm256_loadu_si256((const __m256i*) &ref[1]); + const __m256i v_ref1 = _mm256_loadu_si256((const __m256i*) &ref[33]); + + // These stay constant through the loop + const __m256i v_lo0 = _mm256_unpacklo_epi8(v_ref0, v_last_ref); + const __m256i v_lo1 = _mm256_unpacklo_epi8(v_ref1, v_last_ref); + const __m256i v_hi0 = _mm256_unpackhi_epi8(v_ref0, v_last_ref); + const __m256i v_hi1 = _mm256_unpackhi_epi8(v_ref1, v_last_ref); + + for (uint8_t y = 0, a = line - 1, b = 1, d = 0; y < line; ++y, --a, ++b, d += 4) { + uint8_t tmp[2] = {a, b}; + uint16_t* tmp2 = (uint16_t*)tmp; + const __m256i v_ys = _mm256_set1_epi16(*tmp2); + + __m256i v_madd_lo0 = _mm256_maddubs_epi16(v_lo0, v_ys); + __m256i v_madd_lo1 = _mm256_maddubs_epi16(v_lo1, v_ys); + __m256i v_madd_hi0 = _mm256_maddubs_epi16(v_hi0, v_ys); + __m256i v_madd_hi1 = _mm256_maddubs_epi16(v_hi1, v_ys); + + dst[d + 0] = _mm256_permute2x128_si256(v_madd_lo0, v_madd_hi0, 0x20); + dst[d + 1] = _mm256_permute2x128_si256(v_madd_lo0, v_madd_hi0, 0x31); + dst[d + 2] = _mm256_permute2x128_si256(v_madd_lo1, v_madd_hi1, 0x20); + dst[d + 3] = _mm256_permute2x128_si256(v_madd_lo1, v_madd_hi1, 0x31); + } +} + + +static intra_planar_half_func* planar_func_table[2][7] = { + { NULL, NULL, intra_pred_planar_hor_w4, intra_pred_planar_hor_w8, intra_pred_planar_hor_w16, intra_pred_planar_hor_w32, intra_pred_planar_hor_w64}, + { NULL, NULL, intra_pred_planar_ver_w4, intra_pred_planar_ver_w8, intra_pred_planar_ver_w16, intra_pred_planar_ver_w32, intra_pred_planar_ver_w64} +}; + + +static void uvg_intra_pred_planar_avx2(const cu_loc_t* const cu_loc, + color_t color, + const uint8_t* const ref_top, + const uint8_t* const ref_left, + uvg_pixel* dst) +{ + const int16_t width = color == COLOR_Y ? cu_loc->width : cu_loc->chroma_width; + const int16_t height = color == COLOR_Y ? cu_loc->height : cu_loc->chroma_height; + const int samples = width * height; + const __m256i v_samples = _mm256_set1_epi32(samples); + + const int log2_width = uvg_g_convert_to_log2[width]; + const int log2_height = uvg_g_convert_to_log2[height]; + const int shift_r = log2_width + log2_height + 1; + + __m256i v_pred_hor[256]; + __m256i v_pred_ver[256]; + + intra_planar_half_func* planar_hor = planar_func_table[0][log2_width]; + intra_planar_half_func* planar_ver = planar_func_table[1][log2_width]; + + planar_hor(ref_left, ref_top, height, log2_height, v_pred_hor); + planar_ver(ref_top, ref_left, height, log2_width, v_pred_ver); + + // debug + int16_t* hor_res = (int16_t*)v_pred_hor; + int16_t* ver_res = (int16_t*)v_pred_ver; + + // Cast two 16-bit values to 32-bit and fill a 256-bit vector + int16_t tmp[2] = {height, width}; + int32_t* tmp2 = (int32_t*)tmp; + const __m256i v_madd_shift = _mm256_set1_epi32(*tmp2); + + __m256i v_res[256]; + // Old loop + /*for (int i = 0, d = 0; i < samples; i += 16, ++d) { + v_res[d] = _mm256_add_epi16(v_pred_ver[d], v_pred_hor[d]); + v_res[d] = _mm256_add_epi16(v_res[d], v_samples); + v_res[d] = _mm256_srli_epi16(v_res[d], shift_r); + }*/ + + // New loop + __m128i shift_r_v = _mm_setzero_si128(); + shift_r_v = _mm_insert_epi32(shift_r_v, shift_r, 0); + for (int i = 0, d = 0; i < samples; i += 16, ++d) { + __m256i v_lo = _mm256_unpacklo_epi16(v_pred_hor[d], v_pred_ver[d]); + __m256i v_hi = _mm256_unpackhi_epi16(v_pred_hor[d], v_pred_ver[d]); + + // madd will extend the intermediate results to 32-bit to avoid overflows + __m256i v_madd_lo = _mm256_madd_epi16(v_lo, v_madd_shift); + __m256i v_madd_hi = _mm256_madd_epi16(v_hi, v_madd_shift); + + v_madd_lo = _mm256_add_epi32(v_madd_lo, v_samples); + v_madd_hi = _mm256_add_epi32(v_madd_hi, v_samples); + + v_madd_lo = _mm256_srl_epi32(v_madd_lo, shift_r_v); + v_madd_hi = _mm256_srl_epi32(v_madd_hi, shift_r_v); + + v_res[d] = _mm256_packs_epi32(v_madd_lo, v_madd_hi); + } + + // debug + int16_t* res = (int16_t*)v_res; + + if (samples == 16) { + __m256i v_tmp = _mm256_packus_epi16(v_res[0], v_res[0]); + v_tmp = _mm256_permute4x64_epi64(v_tmp, _MM_SHUFFLE(3, 1, 2, 0)); + __m128i v_tmp2 = _mm256_castsi256_si128(v_tmp); + _mm_store_si128((__m128i*)dst, v_tmp2); + } + else { + for (int i = 0, s = 0; i < samples; i += 32, s += 2) { + __m256i v_tmp = _mm256_packus_epi16(v_res[s + 0], v_res[s + 1]); + v_tmp = _mm256_permute4x64_epi64(v_tmp, _MM_SHUFFLE(3, 1, 2, 0)); + + _mm256_storeu_si256((__m256i*)&dst[i], v_tmp); + } + } +} + + +/** +* \brief Position Dependent Prediction Combination for Planar and DC modes. +* \param mode Intra mode, 0 for planar, 1 for DC. +* \param cu_loc Pointer to the CU location information. +* \param color Color component. +* \param used_ref Pointer to the used reference pixels. +* \param dst Buffer of size MAX_PRED_WIDTH * MAX_PRED_WIDTH. +*/ +// TODO: allegedly does not work with blocks with height 1 and 2. Test this. +// TODO: or just rework the whole thing. We might be able to optimize this further. +static void uvg_pdpc_planar_dc_avx2( + const int mode, + const cu_loc_t* const cu_loc, + const color_t color, + const uvg_intra_ref *const used_ref, + uvg_pixel *const dst) +{ + assert(mode == 0 || mode == 1); // planar or DC + const int width = color == COLOR_Y ? cu_loc->width : cu_loc->chroma_width; + const int height = color == COLOR_Y ? cu_loc->height : cu_loc->chroma_height; + const int log2_width = uvg_g_convert_to_log2[width]; + const int log2_height = uvg_g_convert_to_log2[height]; + + __m256i shuf_mask_byte = _mm256_setr_epi8( + 0, -1, 0, -1, 0, -1, 0, -1, + 1, -1, 1, -1, 1, -1, 1, -1, + 2, -1, 2, -1, 2, -1, 2, -1, + 3, -1, 3, -1, 3, -1, 3, -1 + ); + + __m256i shuf_mask_word = _mm256_setr_epi8( + 0, 1, 0, 1, 0, 1, 0, 1, + 2, 3, 2, 3, 2, 3, 2, 3, + 4, 5, 4, 5, 4, 5, 4, 5, + 6, 7, 6, 7, 6, 7, 6, 7 + ); + + const int scale = ((log2_width - 2 + log2_height - 2 + 2) >> 2); + + // Same weights regardless of axis, compute once + int16_t w[LCU_WIDTH]; + for (int i = 0; i < MAX(width, height); i += 4) { + __m128i base = _mm_set1_epi32(i); + __m128i offs = _mm_setr_epi32(0, 1, 2, 3); + __m128i idxs = _mm_add_epi32(base, offs); + __m128i unclipped = _mm_slli_epi32(idxs, 1); + unclipped = _mm_srli_epi32(unclipped, scale); + __m128i clipped = _mm_min_epi32( _mm_set1_epi32(31), unclipped); + __m128i weights = _mm_srlv_epi32(_mm_set1_epi32(32), clipped); + weights = _mm_packus_epi32(weights, weights); + _mm_storel_epi64((__m128i*)&w[i], weights); + } + + // Process in 4x4 blocks + for (int y = 0; y < height; y += 4) { + for (int x = 0; x < width; x += 4) { + + uint32_t dw_left; + uint32_t dw_top; + memcpy(&dw_left, &used_ref->left[y + 1], sizeof(dw_left)); + memcpy(&dw_top , &used_ref->top [x + 1], sizeof(dw_top)); + __m256i vleft = _mm256_set1_epi32(dw_left); + __m256i vtop = _mm256_set1_epi32(dw_top); + vleft = _mm256_shuffle_epi8(vleft, shuf_mask_byte); + vtop = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(vtop)); + + __m128i vseq = _mm_setr_epi32(0, 1, 2, 3); + __m128i vidx = _mm_slli_epi32(vseq, log2_width); + __m128i vdst = _mm_i32gather_epi32((const int32_t*)(dst + y * width + x), vidx, 1); + __m256i vdst16 = _mm256_cvtepu8_epi16(vdst); + uint64_t quad_wL; + uint64_t quad_wT; + memcpy(&quad_wL, &w[x], sizeof(quad_wL)); + memcpy(&quad_wT, &w[y], sizeof(quad_wT)); + __m256i vwL = _mm256_set1_epi64x(quad_wL); + __m256i vwT = _mm256_set1_epi64x(quad_wT); + vwT = _mm256_shuffle_epi8(vwT, shuf_mask_word); + __m256i diff_left = _mm256_sub_epi16(vleft, vdst16); + __m256i diff_top = _mm256_sub_epi16(vtop , vdst16); + __m256i prod_left = _mm256_mullo_epi16(vwL, diff_left); + __m256i prod_top = _mm256_mullo_epi16(vwT, diff_top); + __m256i accu = _mm256_add_epi16(prod_left, prod_top); + accu = _mm256_add_epi16(accu, _mm256_set1_epi16(32)); + accu = _mm256_srai_epi16(accu, 6); + accu = _mm256_add_epi16(vdst16, accu); + + __m128i lo = _mm256_castsi256_si128(accu); + __m128i hi = _mm256_extracti128_si256(accu, 1); + vdst = _mm_packus_epi16(lo, hi); + + *(uint32_t*)(dst + (y + 0) * width + x) = _mm_extract_epi32(vdst, 0); + *(uint32_t*)(dst + (y + 1) * width + x) = _mm_extract_epi32(vdst, 1); + *(uint32_t*)(dst + (y + 2) * width + x) = _mm_extract_epi32(vdst, 2); + *(uint32_t*)(dst + (y + 3) * width + x) = _mm_extract_epi32(vdst, 3); + } + } +} + +static INLINE void mip_ref_downsampling_4x4_4to2_avx2(uvg_pixel* reduced_dst, const uvg_pixel* const ref_top, const uvg_pixel* const ref_left) +{ + const uint8_t down_smp_factor = 2; // width / red_bdry_size + const int log2_factor = uvg_g_convert_to_log2[down_smp_factor]; + const int rounding_offset = (1 << (log2_factor - 1)); + + const __m128i vrnd = _mm_set1_epi16(rounding_offset); + + ALIGNED(16) uint32_t ref[4]; + ref[0] = *(uint32_t*)ref_top; + ref[1] = *(uint32_t*)ref_left; + + __m128i vref = _mm_load_si128((__m128i*)ref); + vref = _mm_cvtepu8_epi16(vref); + + __m128i vres = _mm_hadd_epi16(vref, vref); + + vres = _mm_add_epi16(vres, vrnd); + vres = _mm_srli_epi16(vres, log2_factor); + __m128i vout = _mm_packus_epi16(vres, vres); + + *(uint32_t*)reduced_dst = _mm_extract_epi32(vout, 0); +} + +static INLINE void mip_ref_downsampling_8x8_8to4_avx2(uvg_pixel* reduced_dst, const uvg_pixel* const ref_top, const uvg_pixel* const ref_left) +{ + const uint8_t down_smp_factor = 2; // width / red_bdry_size + const int log2_factor = uvg_g_convert_to_log2[down_smp_factor]; + const int rounding_offset = (1 << (log2_factor - 1)); + + const __m256i vrnd = _mm256_set1_epi16(rounding_offset); + + ALIGNED(16) uint64_t ref[2]; + ref[0] = *(uint64_t*)ref_top; + ref[1] = *(uint64_t*)ref_left; + + __m128i vref = _mm_load_si128((__m128i*)ref); + __m256i vref256 = _mm256_cvtepu8_epi16(vref); + + __m256i vres = _mm256_hadd_epi16(vref256, vref256); + vres = _mm256_permute4x64_epi64(vres, _MM_SHUFFLE(3, 1, 2, 0)); + + vres = _mm256_add_epi16(vres, vrnd); + vres = _mm256_srli_epi16(vres, log2_factor); + __m256i vout = _mm256_packus_epi16(vres, vres); + + *(uint64_t*)reduced_dst = _mm256_extract_epi64(vout, 0); +} + +static INLINE void mip_ref_downsampling_1D_8to4_avx2(uvg_pixel* reduced_dst, const uvg_pixel* const ref_src) +{ + const uint8_t down_smp_factor = 2; // width / red_bdry_size + const int log2_factor = uvg_g_convert_to_log2[down_smp_factor]; + const int rounding_offset = (1 << (log2_factor - 1)); + + const __m128i vrnd = _mm_set1_epi16(rounding_offset); + + __m128i vref = _mm_loadu_si128((__m128i*)ref_src); // Half the data is garbage and will be ignored. + vref = _mm_cvtepu8_epi16(vref); + __m128i vres = _mm_hadd_epi16(vref, vref); + vres = _mm_add_epi16(vres, vrnd); + vres = _mm_srli_epi16(vres, log2_factor); + __m128i vout = _mm_packus_epi16(vres, vres); + + *(int32_t*)reduced_dst = _mm_extract_epi32(vout, 0); +} + +static INLINE void mip_ref_downsampling_1D_16to4_avx2(uvg_pixel* reduced_dst, const uvg_pixel* const ref_src) +{ + const uint8_t down_smp_factor = 4; // width / red_bdry_size + const int log2_factor = uvg_g_convert_to_log2[down_smp_factor]; + const int rounding_offset = (1 << (log2_factor - 1)); + + const __m256i vrnd = _mm256_set1_epi16(rounding_offset); + + // TODO: try _mm256_dpbuud. + // NOTE: ignore this TODO for now, using dpbuud causes error 0xC000001D: Illegal Instruction. + // The instruction requires a newer CPU. + __m128i vref = _mm_loadu_si128((__m128i*)ref_src); + __m256i vref256 = _mm256_cvtepu8_epi16(vref); + __m256i vres = _mm256_hadd_epi16(vref256, vref256); + vres = _mm256_permute4x64_epi64(vres, _MM_SHUFFLE(3, 1, 2, 0)); + vres = _mm256_hadd_epi16(vres, vres); + vres = _mm256_add_epi16(vres, vrnd); + vres = _mm256_srli_epi16(vres, log2_factor); + __m256i vout = _mm256_packus_epi16(vres, vres); + + *(int32_t*)(reduced_dst + 0) = _mm256_extract_epi32(vout, 0); +} + +static INLINE void mip_ref_downsampling_1D_32to4_avx2(uvg_pixel* reduced_dst, const uvg_pixel* const ref_src) +{ + const uint8_t down_smp_factor = 8; // width / red_bdry_size + const int log2_factor = uvg_g_convert_to_log2[down_smp_factor]; + const int rounding_offset = (1 << (log2_factor - 1)); + + const __m256i vrnd = _mm256_set1_epi16(rounding_offset); + + __m128i vrefa = _mm_loadu_si128((__m128i*)(ref_src + 0)); + __m128i vrefb = _mm_loadu_si128((__m128i*)(ref_src + 16)); + + __m256i vref256a = _mm256_cvtepu8_epi16(vrefa); + __m256i vref256b = _mm256_cvtepu8_epi16(vrefb); + + __m256i vres = _mm256_hadd_epi16(vref256a, vref256b); + vres = _mm256_permute4x64_epi64(vres, _MM_SHUFFLE(3, 1, 2, 0)); + vres = _mm256_hadd_epi16(vres, vres); + vres = _mm256_permute4x64_epi64(vres, _MM_SHUFFLE(3, 1, 2, 0)); + vres = _mm256_hadd_epi16(vres, vres); + + vres = _mm256_add_epi16(vres, vrnd); + vres = _mm256_srli_epi16(vres, log2_factor); + __m256i vout = _mm256_packus_epi16(vres, vres); + + *(int32_t*)(reduced_dst + 0) = _mm256_extract_epi32(vout, 0); +} + +static INLINE void mip_ref_downsampling_1D_64to4_avx2(uvg_pixel* reduced_dst, const uvg_pixel* const ref_src) +{ + const uint8_t down_smp_factor = 16; // width / red_bdry_size + const int log2_factor = uvg_g_convert_to_log2[down_smp_factor]; + const int rounding_offset = (1 << (log2_factor - 1)); + + const __m256i vrnd = _mm256_set1_epi16(rounding_offset); + + __m128i vrefa = _mm_loadu_si128((__m128i*)(ref_src + 0)); + __m128i vrefb = _mm_loadu_si128((__m128i*)(ref_src + 16)); + __m128i vrefc = _mm_loadu_si128((__m128i*)(ref_src + 32)); + __m128i vrefd = _mm_loadu_si128((__m128i*)(ref_src + 48)); + + __m256i vref256a = _mm256_cvtepu8_epi16(vrefa); + __m256i vref256b = _mm256_cvtepu8_epi16(vrefb); + __m256i vref256c = _mm256_cvtepu8_epi16(vrefc); + __m256i vref256d = _mm256_cvtepu8_epi16(vrefd); + + + __m256i vres0 = _mm256_hadd_epi16(vref256a, vref256b); + __m256i vres1 = _mm256_hadd_epi16(vref256c, vref256d); + vres0 = _mm256_permute4x64_epi64(vres0, _MM_SHUFFLE(3, 1, 2, 0)); + vres1 = _mm256_permute4x64_epi64(vres1, _MM_SHUFFLE(3, 1, 2, 0)); + + vres0 = _mm256_hadd_epi16(vres0, vres1); + vres0 = _mm256_permute4x64_epi64(vres0, _MM_SHUFFLE(3, 1, 2, 0)); + + vres0 = _mm256_hadd_epi16(vres0, vres0); + vres0 = _mm256_permute4x64_epi64(vres0, _MM_SHUFFLE(3, 1, 2, 0)); + + vres0 = _mm256_hadd_epi16(vres0, vres0); + + vres0 = _mm256_add_epi16(vres0, vrnd); + vres0 = _mm256_srli_epi16(vres0, log2_factor); + __m256i vout = _mm256_packus_epi16(vres0, vres0); + + *(int32_t*)(reduced_dst + 0) = _mm256_extract_epi32(vout, 0); + //*(int32_t*)(reduced_dst + 2) = _mm_extract_epi16(vout, 8); +} + + +// Size ID 0 +static INLINE void mip_reduced_pred_sid0_avx2(uvg_pixel* const output, + const int16_t* const input, + const uint16_t* matrix, + const bool transpose, + const int in_offset, + const int in_offset_tr) +{ + const int input_size = 4; + // const int pred_size = 4; + // const int size_id = 0; + + int sum = 0; + for (int i = 0; i < input_size; i++) { + sum += input[i]; + } + const int offset = (1 << (MIP_SHIFT_MATRIX - 1)) - MIP_OFFSET_MATRIX * sum; + + const __m128i vofs = _mm_set1_epi32(offset); + + const uint16_t* weight = matrix; + const int input_offset = transpose ? in_offset_tr : in_offset; + + const __m128i vinofs = _mm_set1_epi32(input_offset); + + const __m128i vshuf = _mm_setr_epi8( + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 + ); + + const __m128i vtranspose = _mm_setr_epi8( + 0x00, 0x04, 0x08, 0x0c, 0x01, 0x05, 0x09, 0x0d, + 0x02, 0x06, 0x0a, 0x0e, 0x03, 0x07, 0x0b, 0x0f + ); + + const __m128i vinraw = _mm_loadu_si128((__m128i*)input); + const __m128i vin = _mm_shuffle_epi8(vinraw, vshuf); + + // Calculate first half + __m128i vweight0 = _mm_loadu_si128((__m128i*) &weight[0]); + __m128i vweight1 = _mm_loadu_si128((__m128i*) &weight[8]); + __m128i vweight2 = _mm_loadu_si128((__m128i*) &weight[16]); + __m128i vweight3 = _mm_loadu_si128((__m128i*) &weight[24]); + + weight += 32; + + __m128i vmadd0 = _mm_madd_epi16(vin, vweight0); + __m128i vmadd1 = _mm_madd_epi16(vin, vweight1); + __m128i vmadd2 = _mm_madd_epi16(vin, vweight2); + __m128i vmadd3 = _mm_madd_epi16(vin, vweight3); + + __m128i vresult0 = _mm_hadd_epi32(vmadd0, vmadd1); + __m128i vresult1 = _mm_hadd_epi32(vmadd2, vmadd3); + + vresult0 = _mm_add_epi32(vresult0, vofs); + vresult0 = _mm_srai_epi32(vresult0, MIP_SHIFT_MATRIX); + vresult0 = _mm_add_epi32(vresult0, vinofs); + + vresult1 = _mm_add_epi32(vresult1, vofs); + vresult1 = _mm_srai_epi32(vresult1, MIP_SHIFT_MATRIX); + vresult1 = _mm_add_epi32(vresult1, vinofs); + + __m128i vres16_a = _mm_packus_epi32(vresult0, vresult1); + + // Calculate second half + vweight0 = _mm_loadu_si128((__m128i*) & weight[0]); + vweight1 = _mm_loadu_si128((__m128i*) & weight[8]); + vweight2 = _mm_loadu_si128((__m128i*) & weight[16]); + vweight3 = _mm_loadu_si128((__m128i*) & weight[24]); + + vmadd0 = _mm_madd_epi16(vin, vweight0); + vmadd1 = _mm_madd_epi16(vin, vweight1); + vmadd2 = _mm_madd_epi16(vin, vweight2); + vmadd3 = _mm_madd_epi16(vin, vweight3); + + vresult0 = _mm_hadd_epi32(vmadd0, vmadd1); + vresult1 = _mm_hadd_epi32(vmadd2, vmadd3); + + vresult0 = _mm_add_epi32(vresult0, vofs); + vresult0 = _mm_srai_epi32(vresult0, MIP_SHIFT_MATRIX); + vresult0 = _mm_add_epi32(vresult0, vinofs); + + vresult1 = _mm_add_epi32(vresult1, vofs); + vresult1 = _mm_srai_epi32(vresult1, MIP_SHIFT_MATRIX); + vresult1 = _mm_add_epi32(vresult1, vinofs); + + __m128i vres16_b = _mm_packus_epi32(vresult0, vresult1); + __m128i vres8 = _mm_packus_epi16(vres16_a, vres16_b); + + if (transpose) { + vres8 = _mm_shuffle_epi8(vres8, vtranspose); + _mm_storeu_si128((__m128i*)output, vres8); + } + else { + _mm_storeu_si128((__m128i*)output, vres8); + } +} + +// Size ID 1 +static void INLINE mip_reduced_pred_sid1_avx2(uvg_pixel* const output, + const int16_t* const input, + const uint16_t* matrix, + const bool transpose, + const int in_offset, + const int in_offset_tr) +{ + const int input_size = 8; + const int pred_size = 4; + const int size_id = 1; + + int sum = 0; + for (int i = 0; i < input_size; i++) { + sum += input[i]; + } + const int offset = (1 << (MIP_SHIFT_MATRIX - 1)) - MIP_OFFSET_MATRIX * sum; + + const __m128i vofs = _mm_set1_epi32(offset); + + const uint16_t* weight = matrix; + const int input_offset = transpose ? in_offset_tr : in_offset; + + const __m128i vinofs = _mm_set1_epi32(input_offset); + + const __m128i vshuf0 = _mm_setr_epi8( + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03); + const __m128i vshuf1 = _mm_setr_epi8( + 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, + 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07); + const __m128i vshuf2 = _mm_setr_epi8( + 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b, + 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b); + const __m128i vshuf3 = _mm_setr_epi8( + 0x0c, 0x0d, 0x0e, 0x0f, 0x0c, 0x0d, 0x0e, 0x0f, + 0x0c, 0x0d, 0x0e, 0x0f, 0x0c, 0x0d, 0x0e, 0x0f); + const __m128i vtranspose = _mm_setr_epi8( + 0x00, 0x04, 0x08, 0x0c, 0x01, 0x05, 0x09, 0x0d, + 0x02, 0x06, 0x0a, 0x0e, 0x03, 0x07, 0x0b, 0x0f + ); + + const __m128i vinraw = _mm_loadu_si128((__m128i*)input); + + const __m128i vin0 = _mm_shuffle_epi8(vinraw, vshuf0); + const __m128i vin1 = _mm_shuffle_epi8(vinraw, vshuf1); + const __m128i vin2 = _mm_shuffle_epi8(vinraw, vshuf2); + const __m128i vin3 = _mm_shuffle_epi8(vinraw, vshuf3); + + + // Calculate row 1, first 4 + __m128i vweight0 = _mm_loadu_si128((__m128i*)&weight[0]); + __m128i vweight1 = _mm_loadu_si128((__m128i*)&weight[8]); + __m128i vweight2 = _mm_loadu_si128((__m128i*)&weight[16]); + __m128i vweight3 = _mm_loadu_si128((__m128i*)&weight[24]); + __m128i vmadd0 = _mm_madd_epi16(vin0, vweight0); + __m128i vmadd1 = _mm_madd_epi16(vin1, vweight1); + __m128i vmadd2 = _mm_madd_epi16(vin2, vweight2); + __m128i vmadd3 = _mm_madd_epi16(vin3, vweight3); + __m128i vadd0 = _mm_add_epi32(vmadd0, vmadd1); + __m128i vadd1 = _mm_add_epi32(vmadd2, vmadd3); + __m128i result0 = _mm_add_epi32(vadd0, vadd1); + result0 = _mm_add_epi32(result0, vofs); + result0 = _mm_srai_epi32(result0, MIP_SHIFT_MATRIX); + result0 = _mm_add_epi32(result0, vinofs); + + weight += input_size * 4; + + // Calculate row 1, last 4 + vweight0 = _mm_loadu_si128((__m128i*)&weight[0]); + vweight1 = _mm_loadu_si128((__m128i*)&weight[8]); + vweight2 = _mm_loadu_si128((__m128i*)&weight[16]); + vweight3 = _mm_loadu_si128((__m128i*)&weight[24]); + vmadd0 = _mm_madd_epi16(vin0, vweight0); + vmadd1 = _mm_madd_epi16(vin1, vweight1); + vmadd2 = _mm_madd_epi16(vin2, vweight2); + vmadd3 = _mm_madd_epi16(vin3, vweight3); + vadd0 = _mm_add_epi32(vmadd0, vmadd1); + vadd1 = _mm_add_epi32(vmadd2, vmadd3); + __m128i result1 = _mm_add_epi32(vadd0, vadd1); + result1 = _mm_add_epi32(result1, vofs); + result1 = _mm_srai_epi32(result1, MIP_SHIFT_MATRIX); + result1 = _mm_add_epi32(result1, vinofs); + + __m128i vres16_a = _mm_packus_epi32(result0, result1); + + + weight += input_size * 4; + // Calculate row 2, first 4 + vweight0 = _mm_loadu_si128((__m128i*)&weight[0]); + vweight1 = _mm_loadu_si128((__m128i*)&weight[8]); + vweight2 = _mm_loadu_si128((__m128i*)&weight[16]); + vweight3 = _mm_loadu_si128((__m128i*)&weight[24]); + + vmadd0 = _mm_madd_epi16(vin0, vweight0); + vmadd1 = _mm_madd_epi16(vin1, vweight1); + vmadd2 = _mm_madd_epi16(vin2, vweight2); + vmadd3 = _mm_madd_epi16(vin3, vweight3); + + vadd0 = _mm_add_epi32(vmadd0, vmadd1); + vadd1 = _mm_add_epi32(vmadd2, vmadd3); + + result0 = _mm_add_epi32(vadd0, vadd1); + + result0 = _mm_add_epi32(result0, vofs); + result0 = _mm_srai_epi32(result0, MIP_SHIFT_MATRIX); + result0 = _mm_add_epi32(result0, vinofs); + + weight += input_size * 4; + // Calculate row 2, last 4 + vweight0 = _mm_loadu_si128((__m128i*)&weight[0]); + vweight1 = _mm_loadu_si128((__m128i*)&weight[8]); + vweight2 = _mm_loadu_si128((__m128i*)&weight[16]); + vweight3 = _mm_loadu_si128((__m128i*)&weight[24]); + vmadd0 = _mm_madd_epi16(vin0, vweight0); + vmadd1 = _mm_madd_epi16(vin1, vweight1); + vmadd2 = _mm_madd_epi16(vin2, vweight2); + vmadd3 = _mm_madd_epi16(vin3, vweight3); + vadd0 = _mm_add_epi32(vmadd0, vmadd1); + vadd1 = _mm_add_epi32(vmadd2, vmadd3); + result1 = _mm_add_epi32(vadd0, vadd1); + result1 = _mm_add_epi32(result1, vofs); + result1 = _mm_srai_epi32(result1, MIP_SHIFT_MATRIX); + result1 = _mm_add_epi32(result1, vinofs); + __m128i vres16_b = _mm_packus_epi32(result0, result1); + __m128i vres8 = _mm_packus_epi16(vres16_a, vres16_b); + if (transpose) { + vres8 = _mm_shuffle_epi8(vres8, vtranspose); + _mm_storeu_si128((__m128i*)output, vres8); + + } else { + _mm_storeu_si128((__m128i*)output, vres8); + } + /*if (transpose) { + for (int y = 0; y < pred_size; y++) { + for (int x = 0; x < pred_size; x++) { + output[y * pred_size + x] = out_ptr[x * pred_size + y]; + } + } + }*/ +} + +// Size ID 2 +static void INLINE mip_reduced_pred_sid2_avx2(uvg_pixel* const output, + const int16_t* const input, + const uint16_t* matrix, + const bool transpose, + const int in_offset, + const int in_offset_tr) +{ + const int input_size = 8; + const int pred_size = 8; + const int size_id = 2; + + uvg_pixel * out_ptr = output; + + int sum = 0; + for (int i = 0; i < input_size; i++) { + sum += input[i]; + } + const int offset = (1 << (MIP_SHIFT_MATRIX - 1)) - MIP_OFFSET_MATRIX * sum; + + const __m128i vofs = _mm_set1_epi32(offset); + + const uint16_t* weight = matrix; + const int input_offset = transpose ? in_offset_tr : in_offset; + + const __m128i vinofs = _mm_set1_epi32(input_offset); + + const __m128i vshuf0 = _mm_setr_epi8( + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03); + const __m128i vshuf1 = _mm_setr_epi8( + 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, + 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07); + const __m128i vshuf2 = _mm_setr_epi8( + 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b, + 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b); + const __m128i vshuf3 = _mm_setr_epi8( + 0x0c, 0x0d, 0x0e, 0x0f, 0x0c, 0x0d, 0x0e, 0x0f, + 0x0c, 0x0d, 0x0e, 0x0f, 0x0c, 0x0d, 0x0e, 0x0f); + + const __m128i vinraw = _mm_loadu_si128((__m128i*)input); + + const __m128i vin0 = _mm_shuffle_epi8(vinraw, vshuf0); + const __m128i vin1 = _mm_shuffle_epi8(vinraw, vshuf1); + const __m128i vin2 = _mm_shuffle_epi8(vinraw, vshuf2); + const __m128i vin3 = _mm_shuffle_epi8(vinraw, vshuf3); + __m128i vtranspose = _mm_setr_epi8( + 0x00, 0x08, 0x01, 0x09, 0x02, 0x0a, 0x03, 0x0b, + 0x04, 0x0c, 0x05, 0x0d, 0x06, 0x0e, 0x07, 0x0f + ); + + __m128i vtmpres[4]; + + for (int y = 0, tmp = 0; y < pred_size; y += 2, ++tmp) { + // Calculate row 1, first 4 + __m128i vweight0 = _mm_loadu_si128((__m128i*) &weight[0]); + __m128i vweight1 = _mm_loadu_si128((__m128i*) &weight[8]); + __m128i vweight2 = _mm_loadu_si128((__m128i*) &weight[16]); + __m128i vweight3 = _mm_loadu_si128((__m128i*) &weight[24]); + + __m128i vmadd0 = _mm_madd_epi16(vin0, vweight0); + __m128i vmadd1 = _mm_madd_epi16(vin1, vweight1); + __m128i vmadd2 = _mm_madd_epi16(vin2, vweight2); + __m128i vmadd3 = _mm_madd_epi16(vin3, vweight3); + + __m128i vadd0 = _mm_add_epi32(vmadd0, vmadd1); + __m128i vadd1 = _mm_add_epi32(vmadd2, vmadd3); + + __m128i result0 = _mm_add_epi32(vadd0, vadd1); + + result0 = _mm_add_epi32(result0, vofs); + result0 = _mm_srai_epi32(result0, MIP_SHIFT_MATRIX); + result0 = _mm_add_epi32(result0, vinofs); + + weight += input_size * 4; + + // Calculate row 1, last 4 + vweight0 = _mm_loadu_si128((__m128i*) &weight[0]); + vweight1 = _mm_loadu_si128((__m128i*) &weight[8]); + vweight2 = _mm_loadu_si128((__m128i*) &weight[16]); + vweight3 = _mm_loadu_si128((__m128i*) &weight[24]); + + vmadd0 = _mm_madd_epi16(vin0, vweight0); + vmadd1 = _mm_madd_epi16(vin1, vweight1); + vmadd2 = _mm_madd_epi16(vin2, vweight2); + vmadd3 = _mm_madd_epi16(vin3, vweight3); + + vadd0 = _mm_add_epi32(vmadd0, vmadd1); + vadd1 = _mm_add_epi32(vmadd2, vmadd3); + + __m128i result1 = _mm_add_epi32(vadd0, vadd1); + + result1 = _mm_add_epi32(result1, vofs); + result1 = _mm_srai_epi32(result1, MIP_SHIFT_MATRIX); + result1 = _mm_add_epi32(result1, vinofs); + + __m128i vres16_a = _mm_packus_epi32(result0, result1); + + weight += input_size * 4; + + // Calculate row 2, first 4 + vweight0 = _mm_loadu_si128((__m128i*) &weight[0]); + vweight1 = _mm_loadu_si128((__m128i*) &weight[8]); + vweight2 = _mm_loadu_si128((__m128i*) &weight[16]); + vweight3 = _mm_loadu_si128((__m128i*) &weight[24]); + + vmadd0 = _mm_madd_epi16(vin0, vweight0); + vmadd1 = _mm_madd_epi16(vin1, vweight1); + vmadd2 = _mm_madd_epi16(vin2, vweight2); + vmadd3 = _mm_madd_epi16(vin3, vweight3); + + vadd0 = _mm_add_epi32(vmadd0, vmadd1); + vadd1 = _mm_add_epi32(vmadd2, vmadd3); + + result0 = _mm_add_epi32(vadd0, vadd1); + + result0 = _mm_add_epi32(result0, vofs); + result0 = _mm_srai_epi32(result0, MIP_SHIFT_MATRIX); + result0 = _mm_add_epi32(result0, vinofs); + + weight += input_size * 4; + + // Calculate row 2, last 4 + vweight0 = _mm_loadu_si128((__m128i*) &weight[0]); + vweight1 = _mm_loadu_si128((__m128i*) &weight[8]); + vweight2 = _mm_loadu_si128((__m128i*) &weight[16]); + vweight3 = _mm_loadu_si128((__m128i*) &weight[24]); + + vmadd0 = _mm_madd_epi16(vin0, vweight0); + vmadd1 = _mm_madd_epi16(vin1, vweight1); + vmadd2 = _mm_madd_epi16(vin2, vweight2); + vmadd3 = _mm_madd_epi16(vin3, vweight3); + + vadd0 = _mm_add_epi32(vmadd0, vmadd1); + vadd1 = _mm_add_epi32(vmadd2, vmadd3); + + result1 = _mm_add_epi32(vadd0, vadd1); + + result1 = _mm_add_epi32(result1, vofs); + result1 = _mm_srai_epi32(result1, MIP_SHIFT_MATRIX); + result1 = _mm_add_epi32(result1, vinofs); + + __m128i vres16_b = _mm_packus_epi32(result0, result1); + __m128i vres8 = _mm_packus_epi16(vres16_a, vres16_b); + + if (transpose) { + // Store into temporary storage, transpose later + vtmpres[tmp] = vres8; + } + else { + _mm_storeu_si128((__m128i*)out_ptr, vres8); + out_ptr += 16; + } + weight += input_size * 4; + } + + if (transpose) { + vtmpres[0] = _mm_shuffle_epi8(vtmpres[0], vtranspose); + vtmpres[1] = _mm_shuffle_epi8(vtmpres[1], vtranspose); + vtmpres[2] = _mm_shuffle_epi8(vtmpres[2], vtranspose); + vtmpres[3] = _mm_shuffle_epi8(vtmpres[3], vtranspose); + + __m128i v16lo0 = _mm_unpacklo_epi16(vtmpres[0], vtmpres[1]); + __m128i v16lo1 = _mm_unpacklo_epi16(vtmpres[2], vtmpres[3]); + __m128i v16hi0 = _mm_unpackhi_epi16(vtmpres[0], vtmpres[1]); + __m128i v16hi1 = _mm_unpackhi_epi16(vtmpres[2], vtmpres[3]); + + __m128i v32lo0 = _mm_unpacklo_epi32(v16lo0, v16lo1); + __m128i v32lo1 = _mm_unpacklo_epi32(v16hi0, v16hi1); + __m128i v32hi0 = _mm_unpackhi_epi32(v16lo0, v16lo1); + __m128i v32hi1 = _mm_unpackhi_epi32(v16hi0, v16hi1); + + /*__m128i vout0 = _mm_unpacklo_epi64(v32lo0, v32hi0); + __m128i vout1 = _mm_unpacklo_epi64(v32lo1, v32hi1); + __m128i vout2 = _mm_unpackhi_epi64(v32lo0, v32hi0); + __m128i vout3 = _mm_unpackhi_epi64(v32lo1, v32hi1);*/ + + _mm_store_si128((__m128i*)(output + 0), v32lo0); + _mm_store_si128((__m128i*)(output + 16), v32hi0); + _mm_store_si128((__m128i*)(output + 32), v32lo1); + _mm_store_si128((__m128i*)(output + 48), v32hi1); + } +} + + +// 8x8, size id 1 hor upscale params [4, 4, 1, 4, 1, 16, 2, 2] +static void mip_upsampling_w8_ups2_hor_avx2(uvg_pixel* const dst, const uvg_pixel* const src, const uvg_pixel* const ref, const uint16_t dst_step, const uint8_t ref_step) +{ + const uint8_t red_pred_size = 4; + const uint8_t ups_factor = 2; // width / red_pred_size + + const int log2_factor = uvg_g_convert_to_log2[ups_factor]; + const int rounding_offset = 1 << (log2_factor - 1); + + // Shuffles for result lines 0 and 1 + __m128i vshuf0 = _mm_setr_epi8( + 0xff, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, + 0xff, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07 + ); + + __m128i vshuf1 = _mm_setr_epi8( + 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, + 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07 + ); + + // Shuffles for result lines 2 and 3 + __m128i vshuf2 = _mm_setr_epi8( + 0xff, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b, + 0xff, 0x0c, 0x0c, 0x0d, 0x0d, 0x0e, 0x0e, 0x0f + ); + + __m128i vshuf3 = _mm_setr_epi8( + 0x08, 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b, 0x0b, + 0x0c, 0x0c, 0x0d, 0x0d, 0x0e, 0x0e, 0x0f, 0x0f + ); + + __m256i vrnd = _mm256_set1_epi16(rounding_offset); + + uvg_pixel ref0 = *(ref + (ref_step * 1) - 1); + uvg_pixel ref1 = *(ref + (ref_step * 2) - 1); + uvg_pixel ref2 = *(ref + (ref_step * 3) - 1); + uvg_pixel ref3 = *(ref + (ref_step * 4) - 1); + + __m128i vsrc = _mm_loadu_si128((__m128i*)src); + + __m128i vadd0 = _mm_shuffle_epi8(vsrc, vshuf0); + __m128i vadd1 = _mm_shuffle_epi8(vsrc, vshuf1); + __m128i vadd2 = _mm_shuffle_epi8(vsrc, vshuf2); + __m128i vadd3 = _mm_shuffle_epi8(vsrc, vshuf3); + + vadd0 = _mm_insert_epi8(vadd0, ref0, 0x00); + vadd0 = _mm_insert_epi8(vadd0, ref1, 0x08); + vadd2 = _mm_insert_epi8(vadd2, ref2, 0x00); + vadd2 = _mm_insert_epi8(vadd2, ref3, 0x08); + + // Extend to 16-bit + __m256i vadd16_0 = _mm256_cvtepu8_epi16(vadd0); + __m256i vadd16_1 = _mm256_cvtepu8_epi16(vadd1); + __m256i vadd16_2 = _mm256_cvtepu8_epi16(vadd2); + __m256i vadd16_3 = _mm256_cvtepu8_epi16(vadd3); + + __m256i vtmp0 = _mm256_add_epi16(vadd16_0, vadd16_1); + __m256i vtmp1 = _mm256_add_epi16(vadd16_2, vadd16_3); + + vtmp0 = _mm256_add_epi16(vtmp0, vrnd); + vtmp1 = _mm256_add_epi16(vtmp1, vrnd); + + vtmp0 = _mm256_srli_epi16(vtmp0, log2_factor); + vtmp1 = _mm256_srli_epi16(vtmp1, log2_factor); + + __m256i vres = _mm256_packus_epi16(vtmp0, vtmp1); + vres = _mm256_permute4x64_epi64(vres, _MM_SHUFFLE(3, 1, 2, 0)); + + // Dst step is never 8, since this is only called for 8x8 blocks + *(uint64_t*)&dst[dst_step * 0] = _mm256_extract_epi64(vres, 0); + *(uint64_t*)&dst[dst_step * 1] = _mm256_extract_epi64(vres, 1); + *(uint64_t*)&dst[dst_step * 2] = _mm256_extract_epi64(vres, 2); + *(uint64_t*)&dst[dst_step * 3] = _mm256_extract_epi64(vres, 3); + + /*if (dst_step == 8) { + _mm256_storeu_si256((__m256i*)dst, vres); + } + else { + *(uint64_t*)&dst[dst_step * 0] = _mm256_extract_epi64(vres, 0); + *(uint64_t*)&dst[dst_step * 1] = _mm256_extract_epi64(vres, 1); + *(uint64_t*)&dst[dst_step * 2] = _mm256_extract_epi64(vres, 2); + *(uint64_t*)&dst[dst_step * 3] = _mm256_extract_epi64(vres, 3); + }*/ +} + + +static void mip_upsampling_w16_ups2_hor_avx2(uvg_pixel* const dst, const uvg_pixel* const src, const uvg_pixel* const ref, const uint16_t dst_step, const uint8_t ref_step) +{ + const uint8_t red_pred_size = 4; + const uint8_t ups_factor = 2; // width / red_pred_size + + const int log2_factor = uvg_g_convert_to_log2[ups_factor]; + + const uvg_pixel* ref_ptr = ref + ref_step - 1; + const uvg_pixel* src_ptr = src; + const uvg_pixel* dst_ptr = dst; + + for (int i = 0; i < 2; ++i) { + ALIGNED(32) uint8_t before[33]; + memcpy(&before[1], src_ptr, 32); + before[0] = ref_ptr[ref_step * 0]; + before[8] = ref_ptr[ref_step * 1]; + before[16] = ref_ptr[ref_step * 2]; + before[24] = ref_ptr[ref_step * 3]; + + __m256i vbefore = _mm256_load_si256((__m256i*)before); + __m256i vbehind = _mm256_loadu_si256((__m256i*)src_ptr); + + __m256i vavg = _mm256_avg_epu8(vbefore, vbehind); + + __m256i vreslo = _mm256_unpacklo_epi8(vavg, vbehind); + __m256i vreshi = _mm256_unpackhi_epi8(vavg, vbehind); + + _mm_store_si128((__m128i*) & dst_ptr[dst_step * 0], _mm256_extracti128_si256(vreslo, 0)); + _mm_store_si128((__m128i*) & dst_ptr[dst_step * 1], _mm256_extracti128_si256(vreshi, 0)); + _mm_store_si128((__m128i*) & dst_ptr[dst_step * 2], _mm256_extracti128_si256(vreslo, 1)); + _mm_store_si128((__m128i*) & dst_ptr[dst_step * 3], _mm256_extracti128_si256(vreshi, 1)); + + src_ptr += 32; + dst_ptr += dst_step * 4; + ref_ptr += ref_step * 4; + } +} + +static void mip_upsampling_w16_ups4_hor_avx2(uvg_pixel* const dst, const uvg_pixel* const src, const uvg_pixel* const ref, const uint16_t dst_step, const uint8_t ref_step) +{ + const uint8_t red_pred_size = 4; + const uint8_t ups_factor = 4; // width / red_pred_size + + const int log2_factor = uvg_g_convert_to_log2[ups_factor]; + + const uvg_pixel* ref_ptr = ref + ref_step - 1; + const uvg_pixel* src_ptr = src; + const uvg_pixel* dst_ptr = dst; + + int step = ref_step; + __m128i ones = _mm_set1_epi8(1); + __m128i threes = _mm_set1_epi8(3); + + __m256i permute_mask = _mm256_setr_epi32(0, 2, 4, 6, 1, 3, 5, 7); + + // Assign references by hand after copying sources. This will avoid the use of inserts later. + // Before buffer length is 33 since we need to copy reference value into the first index. + // Copying 32 samples is faster than copying 31. First indices of each 8 wide row will be replaced + // with a reference value. + ALIGNED(16) uint8_t before[17]; + memcpy(&before[1], src_ptr, 16); + before[0] = ref_ptr[ref_step * 0]; + before[4] = ref_ptr[ref_step * 1]; + before[8] = ref_ptr[ref_step * 2]; + before[12] = ref_ptr[ref_step * 3]; + + __m128i vbefore = _mm_load_si128((__m128i*)before); + __m128i vbehind = _mm_load_si128((__m128i*)src_ptr); + + // Permute the input values to get the result in correct order. + //vbefore = _mm256_permutevar8x32_epi32(vbefore, permute_mask); + //vbehind = _mm256_permutevar8x32_epi32(vbehind, permute_mask); + + // Calculate the 3 interpolated values between before and behind, middle, left and right. + __m128i vmiddle = _mm_avg_epu8(vbefore, vbehind); + __m128i vleft = _mm_avg_epu8(vmiddle, vbefore); + __m128i vright = _mm_avg_epu8(vmiddle, vbehind); + + // Calculate the two last bits of difference between before and behind. These bits are used to determine if there will be rounding error. + // Rounding error occurs in the left interpolated value if the two last bits of the difference between before and behind is 0b01. + __m128i diff = _mm_sub_epi8(vbehind, vbefore); + diff = _mm_and_si128(diff, threes); + __m128i mask = _mm_cmpeq_epi8(diff, ones); // The rounding error mask will be generated based on the calculated last bits. + __m128i sub_amount = _mm_blendv_epi8(_mm_set1_epi8(0), ones, mask); + + vleft = _mm_sub_epi8(vleft, sub_amount); + + // Same rounding error handling for right interpolated values. + // Error happens if the two last bits of the difference between before and behind is 0b11. + mask = _mm_cmpeq_epi8(diff, threes); + sub_amount = _mm_blendv_epi8(_mm_set1_epi8(0), ones, mask); + + vright = _mm_sub_epi8(vright, sub_amount); + + // Interleave results. + __m128i left_temp0 = _mm_unpacklo_epi8(vleft, vmiddle); + __m128i left_temp1 = _mm_unpackhi_epi8(vleft, vmiddle); + __m128i right_temp0 = _mm_unpacklo_epi8(vright, vbehind); + __m128i right_temp1 = _mm_unpackhi_epi8(vright, vbehind); + + __m128i vtmp0 = _mm_unpacklo_epi16(left_temp0, right_temp0); + __m128i vtmp1 = _mm_unpackhi_epi16(left_temp0, right_temp0); + __m128i vtmp2 = _mm_unpacklo_epi16(left_temp1, right_temp1); + __m128i vtmp3 = _mm_unpackhi_epi16(left_temp1, right_temp1); + + _mm_store_si128((__m128i*)(dst_ptr + dst_step * 0), vtmp0); + _mm_store_si128((__m128i*)(dst_ptr + dst_step * 1), vtmp1); + _mm_store_si128((__m128i*)(dst_ptr + dst_step * 2), vtmp2); + _mm_store_si128((__m128i*)(dst_ptr + dst_step * 3), vtmp3); +} + +static void mip_upsampling_w32_ups4_hor_avx2(uvg_pixel* const dst, const uvg_pixel* const src, const uvg_pixel* const ref, const uint16_t dst_step, const uint8_t ref_step) +{ + const uint8_t red_pred_size = 8; + const uint8_t ups_factor = 4; // width / red_pred_size + + const int log2_factor = uvg_g_convert_to_log2[ups_factor]; + + const uvg_pixel* ref_ptr = ref + ref_step - 1; + const uvg_pixel* src_ptr = src; + const uvg_pixel* dst_ptr = dst; + + int step = ref_step; + __m256i ones = _mm256_set1_epi8(1); + __m256i threes = _mm256_set1_epi8(3); + + __m256i permute_mask = _mm256_setr_epi32(0, 2, 4, 6, 1, 3, 5, 7); + + // This will process 4 rows at a time. Limit is always 8 rows. + for (int i = 0; i < 2; ++i) { + + // Assign references by hand after copying sources. This will avoid the use of inserts later. + // Before buffer length is 33 since we need to copy reference value into the first index. + // Copying 32 samples is faster than copying 31. First indices of each 8 wide row will be replaced + // with a reference value. + ALIGNED(32) uint8_t before[33]; + memcpy(&before[1], src_ptr, 32); + before[0] = ref_ptr[ref_step * 0]; + before[8] = ref_ptr[ref_step * 1]; + before[16] = ref_ptr[ref_step * 2]; + before[24] = ref_ptr[ref_step * 3]; + + + __m256i vbefore = _mm256_load_si256((__m256i*)before); + __m256i vbehind = _mm256_loadu_si256((__m256i*)src_ptr); + + // Permute the input values to get the result in correct order. + vbefore = _mm256_permutevar8x32_epi32(vbefore, permute_mask); + vbehind = _mm256_permutevar8x32_epi32(vbehind, permute_mask); + + // Calculate the 3 interpolated values between before and behind, middle, left and right. + __m256i vmiddle = _mm256_avg_epu8(vbefore, vbehind); + __m256i vleft = _mm256_avg_epu8(vmiddle, vbefore); + __m256i vright = _mm256_avg_epu8(vmiddle, vbehind); + + // Calculate the two last bits of difference between before and behind. These bits are used to determine if there will be rounding error. + // Rounding error occurs in the left interpolated value if the two last bits of the difference between before and behind is 0b01. + __m256i diff = _mm256_sub_epi8(vbehind, vbefore); + diff = _mm256_and_si256(diff, threes); + __m256i mask = _mm256_cmpeq_epi8(diff, ones); // The rounding error mask will be generated based on the calculated last bits. + __m256i sub_amount = _mm256_blendv_epi8(_mm256_set1_epi8(0), ones, mask); + + vleft = _mm256_sub_epi8(vleft, sub_amount); + + // Same rounding error handling for right interpolated values. + // Error happens if the two last bits of the difference between before and behind is 0b11. + mask = _mm256_cmpeq_epi8(diff, threes); + sub_amount = _mm256_blendv_epi8(_mm256_set1_epi8(0), ones, mask); + + vright = _mm256_sub_epi8(vright, sub_amount); + + // Interleave results. + __m256i left_temp0 = _mm256_unpacklo_epi8(vleft, vmiddle); + __m256i left_temp1 = _mm256_unpackhi_epi8(vleft, vmiddle); + __m256i right_temp0 = _mm256_unpacklo_epi8(vright, vbehind); + __m256i right_temp1 = _mm256_unpackhi_epi8(vright, vbehind); + + __m256i vtmp0 = _mm256_unpacklo_epi16(left_temp0, right_temp0); + __m256i vtmp1 = _mm256_unpackhi_epi16(left_temp0, right_temp0); + __m256i vtmp2 = _mm256_unpacklo_epi16(left_temp1, right_temp1); + __m256i vtmp3 = _mm256_unpackhi_epi16(left_temp1, right_temp1); + + _mm256_storeu_si256((__m256i*)(dst_ptr + dst_step * 0), vtmp0); + _mm256_storeu_si256((__m256i*)(dst_ptr + dst_step * 1), vtmp1); + _mm256_storeu_si256((__m256i*)(dst_ptr + dst_step * 2), vtmp2); + _mm256_storeu_si256((__m256i*)(dst_ptr + dst_step * 3), vtmp3); + + src_ptr += 32; + ref_ptr += ref_step * 4; + dst_ptr += dst_step * 4; + } +} + +static void mip_upsampling_w32_ups8_hor_avx2(uvg_pixel* const dst, const uvg_pixel* const src, const uvg_pixel* const ref, const uint16_t dst_step, const uint8_t ref_step) +{ + const uint8_t red_pred_size = 4; + const uint8_t ups_factor = 8; // width / red_pred_size + + const int log2_factor = uvg_g_convert_to_log2[ups_factor]; + const int rounding_offset = 1 << (log2_factor - 1); + + __m128i vshufsrc = _mm_setr_epi8( + 0xff, 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, + 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d + ); + + __m128i vshuf0 = _mm_setr_epi8( + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01 + ); + + __m128i vshuf1 = _mm_setr_epi8( + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03 + ); + + __m128i vshuf2 = _mm_setr_epi8( + 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, + 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05 + ); + + __m128i vshuf3 = _mm_setr_epi8( + 0x06, 0x07, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07, + 0x06, 0x07, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07 + ); + + __m128i vrnd = _mm_set1_epi16(rounding_offset); + __m128i vmul = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8); + + const uvg_pixel* ref_ptr = ref + ref_step - 1; + const uvg_pixel* src_ptr = src; + + int step = ref_step; + + uvg_pixel* dst_ptr = dst; + + for (int i = 0; i < 8; i++) { + // Handle input data + int16_t before = *ref_ptr; + __m128i vtmp = _mm_loadu_si128((__m128i*)src_ptr); + __m128i vbehind = _mm_cvtepu8_epi16(vtmp); + + __m128i vbefore = vbehind; + vbefore = _mm_shuffle_epi8(vbefore, vshufsrc); + vbefore = _mm_insert_epi16(vbefore, before, 0); + __m128i vbeforeshifted = _mm_slli_epi16(vbefore, log2_factor); + + __m128i vinterpolate = _mm_sub_epi16(vbehind, vbefore); + + // Calculate 1st 16 result chunk + __m128i vbefore0 = _mm_shuffle_epi8(vbeforeshifted, vshuf0); + __m128i vinterpolate0 = _mm_shuffle_epi8(vinterpolate, vshuf0); + + __m128i vmulres0 = _mm_mullo_epi16(vinterpolate0, vmul); + vmulres0 = _mm_add_epi16(vmulres0, vbefore0); + + vmulres0 = _mm_add_epi16(vmulres0, vrnd); + vmulres0 = _mm_srai_epi16(vmulres0, log2_factor); + + __m128i vbefore1 = _mm_shuffle_epi8(vbeforeshifted, vshuf1); + __m128i vinterpolate1 = _mm_shuffle_epi8(vinterpolate, vshuf1); + + __m128i vmulres1 = _mm_mullo_epi16(vinterpolate1, vmul); + vmulres1 = _mm_add_epi16(vmulres1, vbefore1); + + vmulres1 = _mm_add_epi16(vmulres1, vrnd); + vmulres1 = _mm_srai_epi16(vmulres1, log2_factor); + + __m128i vres = _mm_packus_epi16(vmulres0, vmulres1); + + _mm_store_si128((__m128i*)(dst_ptr + 0), vres); + + // Calculate 2nd 16 result chunk + vbefore0 = _mm_shuffle_epi8(vbeforeshifted, vshuf2); + vinterpolate0 = _mm_shuffle_epi8(vinterpolate, vshuf2); + + vmulres0 = _mm_mullo_epi16(vinterpolate0, vmul); + vmulres0 = _mm_add_epi16(vmulres0, vbefore0); + + vmulres0 = _mm_add_epi16(vmulres0, vrnd); + vmulres0 = _mm_srai_epi16(vmulres0, log2_factor); + + vbefore1 = _mm_shuffle_epi8(vbeforeshifted, vshuf3); + vinterpolate1 = _mm_shuffle_epi8(vinterpolate, vshuf3); + + vmulres1 = _mm_mullo_epi16(vinterpolate1, vmul); + vmulres1 = _mm_add_epi16(vmulres1, vbefore1); + + vmulres1 = _mm_add_epi16(vmulres1, vrnd); + vmulres1 = _mm_srai_epi16(vmulres1, log2_factor); + + vres = _mm_packus_epi16(vmulres0, vmulres1); + + _mm_store_si128((__m128i*)(dst_ptr + 16), vres); + + dst_ptr += dst_step; + ref_ptr += ref_step; + src_ptr += red_pred_size; + } +} + +static void mip_upsampling_w64_ups8_hor_avx2(uvg_pixel* const dst, const uvg_pixel* const src, const uvg_pixel* const ref, const uint16_t dst_step, const uint8_t ref_step) +{ + const uvg_pixel* ref_ptr = ref + ref_step - 1; + const uvg_pixel* src_ptr = src; + const uvg_pixel* dst_ptr = dst; + + const __m256i ones = _mm256_set1_epi8(1); + const __m256i twos = _mm256_set1_epi8(2); + const __m256i threes = _mm256_set1_epi8(3); + const __m256i fours = _mm256_set1_epi8(4); + const __m256i fives = _mm256_set1_epi8(5); + const __m256i sixes = _mm256_set1_epi8(6); + const __m256i sevens = _mm256_set1_epi8(7); + const __m256i eights = _mm256_set1_epi8(8); + + __m256i shuffle_mask = _mm256_setr_epi8( + 0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0c, 0x0d, + 0x02, 0x03, 0x06, 0x07, 0x0a, 0x0b, 0x0e, 0x0f, + 0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0c, 0x0d, + 0x02, 0x03, 0x06, 0x07, 0x0a, 0x0b, 0x0e, 0x0f + ); + + + // This will process 2 rows at a time. Limit is always 8 rows. + for (int i = 0; i < 2; ++i) { + + // Assign references by hand after copying sources. This will avoid the use of inserts later. + ALIGNED(32) uint8_t before[33]; + memcpy(&before[1], src_ptr, 32); + before[0] = ref_ptr[ref_step * 0]; + before[8] = ref_ptr[ref_step * 1]; + before[16] = ref_ptr[ref_step * 2]; + before[24] = ref_ptr[ref_step * 3]; + + __m256i vbefore = _mm256_load_si256((__m256i*)before); + __m256i vbehind = _mm256_load_si256((__m256i*)src_ptr); + + // Permute the input values to get the result in correct order. + vbefore = _mm256_shuffle_epi8(vbefore, shuffle_mask); + vbehind = _mm256_shuffle_epi8(vbehind, shuffle_mask); + vbefore = _mm256_permute4x64_epi64(vbefore, _MM_SHUFFLE(3, 1, 2, 0)); + vbehind = _mm256_permute4x64_epi64(vbehind, _MM_SHUFFLE(3, 1, 2, 0)); + + // Calculate the 7 interpolated values between before and behind, middle, left and right. + __m256i vmiddle = _mm256_avg_epu8(vbefore, vbehind); + __m256i vleft_middle = _mm256_avg_epu8(vmiddle, vbefore); + __m256i vright_middle = _mm256_avg_epu8(vmiddle, vbehind); + __m256i vleft_left = _mm256_avg_epu8(vbefore, vleft_middle); + __m256i vleft_right = _mm256_avg_epu8(vleft_middle, vmiddle); + __m256i vright_left = _mm256_avg_epu8(vmiddle, vright_middle); + __m256i vright_right = _mm256_avg_epu8(vright_middle, vbehind); + + // Calculate the three and two last bits of difference between before and behind. These bits are used to determine if there will be rounding error. + __m256i diff = _mm256_sub_epi8(vbehind, vbefore); + diff = _mm256_and_si256(diff, sevens); + __m256i three_diff = _mm256_and_si256(diff, threes); + + // Right side + __m256i mask = _mm256_cmpgt_epi8(diff, fours); // The rounding error mask will be generated based on the calculated last bits. + __m256i sub_amount = _mm256_blendv_epi8(_mm256_set1_epi8(0), ones, mask); // If 5, 6, 7 select one + vright_right = _mm256_sub_epi8(vright_right, sub_amount); + + mask = _mm256_cmpeq_epi8(three_diff, threes); + sub_amount = _mm256_blendv_epi8(_mm256_set1_epi8(0), ones, mask); // If 3 or 7 select one + vright_middle = _mm256_sub_epi8(vright_middle, sub_amount); + + __m256i is_two = _mm256_cmpeq_epi8(diff, twos); + __m256i is_five = _mm256_cmpeq_epi8(diff, fives); + mask = _mm256_or_si256(mask, is_two); + mask = _mm256_or_si256(mask, is_five); + sub_amount = _mm256_blendv_epi8(_mm256_set1_epi8(0), ones, mask); // If 2, 3, 5, or 7 select one + vright_left = _mm256_sub_epi8(vright_left, sub_amount); + + // Left side + diff = _mm256_blendv_epi8(diff, eights, _mm256_cmpeq_epi8(_mm256_set1_epi8(0), diff)); // Replace zeros with eights to enable using GT + mask = _mm256_cmpgt_epi8(diff, threes); + sub_amount = _mm256_blendv_epi8(ones, _mm256_set1_epi8(0), mask); // If greater than three select zero + vleft_left = _mm256_sub_epi8(vleft_left, sub_amount); + + mask = _mm256_cmpeq_epi8(three_diff, ones); + sub_amount = _mm256_blendv_epi8(_mm256_set1_epi8(0), ones, mask); // If 1 or 5 select one + vleft_middle = _mm256_sub_epi8(vleft_middle, sub_amount); + + __m256i is_three = _mm256_cmpeq_epi8(diff, threes); + __m256i is_six = _mm256_cmpeq_epi8(diff, sixes); + mask = _mm256_or_si256(mask, is_three); + mask = _mm256_or_si256(mask, is_six); + sub_amount = _mm256_blendv_epi8(_mm256_set1_epi8(0), ones, mask); // If 1, 3, 5, 6 select one + vleft_right = _mm256_sub_epi8(vleft_right, sub_amount); + + // Interleave results. + __m256i left_left_temp0 = _mm256_unpacklo_epi8(vleft_left, vleft_middle); + __m256i left_left_temp1 = _mm256_unpackhi_epi8(vleft_left, vleft_middle); + __m256i left_right_temp0 = _mm256_unpacklo_epi8(vleft_right, vmiddle); + __m256i left_right_temp1 = _mm256_unpackhi_epi8(vleft_right, vmiddle); + __m256i right_left_temp0 = _mm256_unpacklo_epi8(vright_left, vright_middle); + __m256i right_left_temp1 = _mm256_unpackhi_epi8(vright_left, vright_middle); + __m256i right_right_temp0 = _mm256_unpacklo_epi8(vright_right, vbehind); + __m256i right_right_temp1 = _mm256_unpackhi_epi8(vright_right, vbehind); + + __m256i vleft_temp0 = _mm256_unpacklo_epi16(left_left_temp0, left_right_temp0); + __m256i vleft_temp1 = _mm256_unpackhi_epi16(left_left_temp0, left_right_temp0); + __m256i vleft_temp2 = _mm256_unpacklo_epi16(left_left_temp1, left_right_temp1); + __m256i vleft_temp3 = _mm256_unpackhi_epi16(left_left_temp1, left_right_temp1); + __m256i vright_temp0 = _mm256_unpacklo_epi16(right_left_temp0, right_right_temp0); + __m256i vright_temp1 = _mm256_unpackhi_epi16(right_left_temp0, right_right_temp0); + __m256i vright_temp2 = _mm256_unpacklo_epi16(right_left_temp1, right_right_temp1); + __m256i vright_temp3 = _mm256_unpackhi_epi16(right_left_temp1, right_right_temp1); + + __m256i vtmp0 = _mm256_unpacklo_epi32(vleft_temp0, vright_temp0); + __m256i vtmp1 = _mm256_unpackhi_epi32(vleft_temp0, vright_temp0); + __m256i vtmp2 = _mm256_unpacklo_epi32(vleft_temp1, vright_temp1); + __m256i vtmp3 = _mm256_unpackhi_epi32(vleft_temp1, vright_temp1); + __m256i vtmp4 = _mm256_unpacklo_epi32(vleft_temp2, vright_temp2); + __m256i vtmp5 = _mm256_unpackhi_epi32(vleft_temp2, vright_temp2); + __m256i vtmp6 = _mm256_unpacklo_epi32(vleft_temp3, vright_temp3); + __m256i vtmp7 = _mm256_unpackhi_epi32(vleft_temp3, vright_temp3); + + _mm256_store_si256((__m256i*)(dst_ptr + dst_step * 0 + 00), vtmp0); + _mm256_store_si256((__m256i*)(dst_ptr + dst_step * 0 + 32), vtmp1); + _mm256_store_si256((__m256i*)(dst_ptr + dst_step * 1 + 00), vtmp2); + _mm256_store_si256((__m256i*)(dst_ptr + dst_step * 1 + 32), vtmp3); + _mm256_store_si256((__m256i*)(dst_ptr + dst_step * 2 + 00), vtmp4); + _mm256_store_si256((__m256i*)(dst_ptr + dst_step * 2 + 32), vtmp5); + _mm256_store_si256((__m256i*)(dst_ptr + dst_step * 3 + 00), vtmp6); + _mm256_store_si256((__m256i*)(dst_ptr + dst_step * 3 + 32), vtmp7); + + src_ptr += 32; + ref_ptr += ref_step * 4; + dst_ptr += dst_step * 4; + } +} + + + + +static void mip_upsampling_w4_ups2_ver_avx2(uvg_pixel* const dst, const uvg_pixel* const src, const uvg_pixel* const ref) +{ + const uint8_t red_pred_size = 4; + const uint8_t ups_factor = 2; // height / red_pred_size + + int32_t refline = *(int32_t*)ref; + __m128i vbehind = _mm_loadu_si128((__m128i*)src); + __m128i vbefore = vbehind; + + vbefore = _mm_slli_si128(vbefore, 4); // Shift left to make room for one 32-bit integer. This could be done with a shuffle, but there should be no performance difference. + vbefore = _mm_insert_epi32(vbefore, refline, 0); + + __m128i vavg = _mm_avg_epu8(vbefore, vbehind); + + __m128i vres0 = _mm_unpacklo_epi32(vavg, vbehind); + __m128i vres1 = _mm_unpackhi_epi32(vavg, vbehind); + + _mm_store_si128((__m128i*)(dst + 0), vres0); + _mm_store_si128((__m128i*)(dst + 16), vres1); +} + +static void mip_upsampling_w4_ups4_ver_avx2(uvg_pixel* const dst, const uvg_pixel* const src, const uvg_pixel* const ref) +{ + const uint8_t red_pred_size = 4; + const uint8_t ups_factor = 4; // height / red_pred_size + + const int log2_factor = uvg_g_convert_to_log2[ups_factor]; + const int rounding_offset = 1 << (log2_factor - 1); + + __m256i vrnd = _mm256_set1_epi16(rounding_offset); + + /*__m128i vshufbefore = _mm_setr_epi8( + 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b + );*/ + + __m256i vshufres = _mm256_setr_epi8( + 0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b, + 0x04, 0x05, 0x06, 0x07, 0x0c, 0x0d, 0x0e, 0x0f, + 0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b, + 0x04, 0x05, 0x06, 0x07, 0x0c, 0x0d, 0x0e, 0x0f + ); + + int32_t refline = *(int32_t*)ref; + //__m128i vidx = _mm_setr_epi32(0, 32, 64, 96); + //__m128i vbehind = _mm_i32gather_epi32((const int*)src, vidx, 1); + __m128i vbehind = _mm_loadu_si128((__m128i*)src); + __m128i vbefore = vbehind; + + vbefore = _mm_slli_si128(vbefore, 4); // Shift left to make room for one 32-bit integer. This could be done with a shuffle, but there should be no performance difference. + vbefore = _mm_insert_epi32(vbefore, refline, 0); + + __m256i vbefore256 = _mm256_cvtepu8_epi16(vbefore); + __m256i vbehind256 = _mm256_cvtepu8_epi16(vbehind); + + __m256i vbeforeshifted = _mm256_slli_epi16(vbefore256, log2_factor); + + // Add rounding offset + vbeforeshifted = _mm256_add_epi16(vbeforeshifted, vrnd); + + __m256i vinterpolate = _mm256_sub_epi16(vbehind256, vbefore256); + + __m256i vrow0 = _mm256_add_epi16(vbeforeshifted, vinterpolate); + __m256i vrow1 = _mm256_add_epi16(vrow0, vinterpolate); + __m256i vrow2 = _mm256_add_epi16(vrow1, vinterpolate); + + /*vrow0 = _mm256_add_epi16(vrow0, vrnd); + vrow1 = _mm256_add_epi16(vrow1, vrnd); + vrow2 = _mm256_add_epi16(vrow2, vrnd);*/ + + vrow0 = _mm256_srai_epi16(vrow0, log2_factor); + vrow1 = _mm256_srai_epi16(vrow1, log2_factor); + vrow2 = _mm256_srai_epi16(vrow2, log2_factor); + + __m256i vres0 = _mm256_packus_epi16(vrow0, vrow1); + __m256i vres1 = _mm256_packus_epi16(vrow2, vbehind256); + + vres0 = _mm256_shuffle_epi8(vres0, vshufres); + vres1 = _mm256_shuffle_epi8(vres1, vshufres); + + __m256i vlo128 = _mm256_permute2x128_si256(vres0, vres1, 0x20); + __m256i vhi128 = _mm256_permute2x128_si256(vres0, vres1, 0x31); + + vres0 = _mm256_permute4x64_epi64(vlo128, _MM_SHUFFLE(3, 1, 2, 0)); + vres1 = _mm256_permute4x64_epi64(vhi128, _MM_SHUFFLE(3, 1, 2, 0)); + + _mm256_storeu_si256((__m256i*)(dst + 0), vres0); + _mm256_storeu_si256((__m256i*)(dst + 32), vres1); + +} + +static void mip_upsampling_w4_ups8_ver_avx2(uvg_pixel* const dst, const uvg_pixel* const src, const uvg_pixel* const ref) +{ + const uint8_t red_pred_size = 4; + const uint8_t ups_factor = 8; // height / red_pred_size + + const int log2_factor = uvg_g_convert_to_log2[ups_factor]; + const int rounding_offset = 1 << (log2_factor - 1); + + __m256i vrnd = _mm256_set1_epi16(rounding_offset); + + /*__m128i vshufbefore = _mm_setr_epi8( + 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b + );*/ + + __m256i vshufres = _mm256_setr_epi8( + 0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b, + 0x04, 0x05, 0x06, 0x07, 0x0c, 0x0d, 0x0e, 0x0f, + 0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b, + 0x04, 0x05, 0x06, 0x07, 0x0c, 0x0d, 0x0e, 0x0f + ); + + int32_t refline = *(int32_t*)ref; + //__m128i vidx = _mm_setr_epi32(0, 32, 64, 96); + //__m128i vbehind = _mm_i32gather_epi32((const int*)src, vidx, 1); + __m128i vbehind = _mm_loadu_si128((__m128i*)src); + __m128i vbefore = vbehind; + + vbefore = _mm_slli_si128(vbefore, 4); // Shift left to make room for one 32-bit integer. This could be done with a shuffle, but there should be no performance difference. + vbefore = _mm_insert_epi32(vbefore, refline, 0); + + __m256i vbefore256 = _mm256_cvtepu8_epi16(vbefore); + __m256i vbehind256 = _mm256_cvtepu8_epi16(vbehind); + + __m256i vbeforeshifted = _mm256_slli_epi16(vbefore256, log2_factor); + + // Add rounding offset + vbeforeshifted = _mm256_add_epi16(vbeforeshifted, vrnd); + + __m256i vinterpolate = _mm256_sub_epi16(vbehind256, vbefore256); + + __m256i vrow0 = _mm256_add_epi16(vbeforeshifted, vinterpolate); + __m256i vrow1 = _mm256_add_epi16(vrow0, vinterpolate); + __m256i vrow2 = _mm256_add_epi16(vrow1, vinterpolate); + __m256i vrow3 = _mm256_add_epi16(vrow2, vinterpolate); + __m256i vrow4 = _mm256_add_epi16(vrow3, vinterpolate); + __m256i vrow5 = _mm256_add_epi16(vrow4, vinterpolate); + __m256i vrow6 = _mm256_add_epi16(vrow5, vinterpolate); + + /* + vrow0 = _mm256_add_epi16(vrow0, vrnd); + vrow1 = _mm256_add_epi16(vrow1, vrnd); + vrow2 = _mm256_add_epi16(vrow2, vrnd); + vrow3 = _mm256_add_epi16(vrow3, vrnd); + vrow4 = _mm256_add_epi16(vrow4, vrnd); + vrow5 = _mm256_add_epi16(vrow5, vrnd); + vrow6 = _mm256_add_epi16(vrow6, vrnd);*/ + + vrow0 = _mm256_srai_epi16(vrow0, log2_factor); + vrow1 = _mm256_srai_epi16(vrow1, log2_factor); + vrow2 = _mm256_srai_epi16(vrow2, log2_factor); + vrow3 = _mm256_srai_epi16(vrow3, log2_factor); + vrow4 = _mm256_srai_epi16(vrow4, log2_factor); + vrow5 = _mm256_srai_epi16(vrow5, log2_factor); + vrow6 = _mm256_srai_epi16(vrow6, log2_factor); + + __m256i vres0 = _mm256_packus_epi16(vrow0, vrow1); + __m256i vres1 = _mm256_packus_epi16(vrow2, vrow3); + __m256i vres2 = _mm256_packus_epi16(vrow4, vrow5); + __m256i vres3 = _mm256_packus_epi16(vrow6, vbehind256); + + vres0 = _mm256_shuffle_epi8(vres0, vshufres); + vres1 = _mm256_shuffle_epi8(vres1, vshufres); + vres2 = _mm256_shuffle_epi8(vres2, vshufres); + vres3 = _mm256_shuffle_epi8(vres3, vshufres); + + __m256i vupklo0 = _mm256_unpacklo_epi64(vres0, vres1); + __m256i vupklo1 = _mm256_unpacklo_epi64(vres2, vres3); + __m256i vupkhi0 = _mm256_unpackhi_epi64(vres0, vres1); + __m256i vupkhi1 = _mm256_unpackhi_epi64(vres2, vres3); + + vres0 = _mm256_permute2x128_si256(vupklo0, vupklo1, 0x20); + vres1 = _mm256_permute2x128_si256(vupkhi0, vupkhi1, 0x20); + vres2 = _mm256_permute2x128_si256(vupklo0, vupklo1, 0x31); + vres3 = _mm256_permute2x128_si256(vupkhi0, vupkhi1, 0x31); + + _mm256_store_si256((__m256i*)(dst + 0), vres0); + _mm256_store_si256((__m256i*)(dst + 32), vres1); + _mm256_store_si256((__m256i*)(dst + 64), vres2); + _mm256_store_si256((__m256i*)(dst + 96), vres3); +} + + +static void mip_upsampling_w8_ups2_h8_ver_avx2(uvg_pixel* const dst, const uvg_pixel* const src, const uvg_pixel* const ref) +{ + int64_t refline = *(int64_t*)ref; + __m128i vidx0 = _mm_set_epi64x(16, 0); + __m128i vidx1 = _mm_set_epi64x(32, 16); + __m128i vidx2 = _mm_set_epi64x(48, 32); + + __m128i vbehind0 = _mm_i64gather_epi64((const long long*)src, vidx0, 1); + __m128i vbefore1 = _mm_i64gather_epi64((const long long*)src, vidx1, 1); + __m128i vbehind1 = _mm_i64gather_epi64((const long long*)src, vidx2, 1); + + __m128i vbefore0 = vbehind0; + vbefore0 = _mm_slli_si128(vbefore0, 8); // Shift left to make room for one 64-bit integer. This could be done with a shuffle, but there should be no performance difference. + vbefore0 = _mm_insert_epi64(vbefore0, refline, 0); + + __m128i vavg0 = _mm_avg_epu8(vbefore0, vbehind0); + __m128i vavg1 = _mm_avg_epu8(vbefore1, vbehind1); + + __m128i vres0 = _mm_unpacklo_epi64(vavg0, vbehind0); + __m128i vres1 = _mm_unpackhi_epi64(vavg0, vbehind0); + __m128i vres2 = _mm_unpacklo_epi64(vavg1, vbehind1); + __m128i vres3 = _mm_unpackhi_epi64(vavg1, vbehind1); + + _mm_store_si128((__m128i*)(dst + 0), vres0); + _mm_store_si128((__m128i*)(dst + 16), vres1); + _mm_store_si128((__m128i*)(dst + 32), vres2); + _mm_store_si128((__m128i*)(dst + 48), vres3); +} + +static void mip_upsampling_w8_ups2_h16_ver_avx2(uvg_pixel* const dst, const uvg_pixel* const src, const uvg_pixel* const ref) +{ + int64_t refline = *(int64_t*)ref; + __m128i vbehind0 = _mm_load_si128((__m128i*)(src + 0)); + __m128i vbefore1 = _mm_loadu_si128((__m128i*)(src + 8)); + __m128i vbehind1 = _mm_load_si128((__m128i*)(src + 16)); + + __m128i vbefore0 = vbehind0; + vbefore0 = _mm_slli_si128(vbefore0, 8); // Shift left to make room for one 64-bit integer. This could be done with a shuffle, but there should be no performance difference. + vbefore0 = _mm_insert_epi64(vbefore0, refline, 0); + + __m128i vavg0 = _mm_avg_epu8(vbefore0, vbehind0); + __m128i vavg1 = _mm_avg_epu8(vbefore1, vbehind1); + + __m128i vres0 = _mm_unpacklo_epi64(vavg0, vbehind0); + __m128i vres1 = _mm_unpackhi_epi64(vavg0, vbehind0); + __m128i vres2 = _mm_unpacklo_epi64(vavg1, vbehind1); + __m128i vres3 = _mm_unpackhi_epi64(vavg1, vbehind1); + + _mm_store_si128((__m128i*)(dst + 0), vres0); + _mm_store_si128((__m128i*)(dst + 16), vres1); + _mm_store_si128((__m128i*)(dst + 32), vres2); + _mm_store_si128((__m128i*)(dst + 48), vres3); + + vbefore0 = _mm_loadu_si128((__m128i*)(src + 24)); + vbehind0 = _mm_load_si128((__m128i*)(src + 32)); + vbefore1 = _mm_loadu_si128((__m128i*)(src + 40)); + vbehind1 = _mm_load_si128((__m128i*)(src + 48)); + + vavg0 = _mm_avg_epu8(vbefore0, vbehind0); + vavg1 = _mm_avg_epu8(vbefore1, vbehind1); + + vres0 = _mm_unpacklo_epi64(vavg0, vbehind0); + vres1 = _mm_unpackhi_epi64(vavg0, vbehind0); + vres2 = _mm_unpacklo_epi64(vavg1, vbehind1); + vres3 = _mm_unpackhi_epi64(vavg1, vbehind1); + + _mm_store_si128((__m128i*)(dst + 64), vres0); + _mm_store_si128((__m128i*)(dst + 80), vres1); + _mm_store_si128((__m128i*)(dst + 96), vres2); + _mm_store_si128((__m128i*)(dst + 112), vres3); +} + +static void mip_upsampling_w8_ups4_ver_avx2(uvg_pixel* const dst, const uvg_pixel* const src, const uvg_pixel* const ref) +{ + const uint8_t red_pred_size = 8; + const uint8_t ups_factor = 4; // height / red_pred_size + + const int log2_factor = uvg_g_convert_to_log2[ups_factor]; + const int rounding_offset = 1 << (log2_factor - 1); + __m256i vrnd = _mm256_set1_epi16(rounding_offset); + + int64_t refline = *(int64_t*)ref; + __m128i vbehind = _mm_loadu_si128((__m128i*)src); + __m128i vbefore = vbehind; + + vbefore = _mm_slli_si128(vbefore, 8); // Shift left to make room for one 64-bit integer. This could be done with a shuffle, but there should be no performance difference. + vbefore = _mm_insert_epi64(vbefore, refline, 0); + + __m256i vbefore256 = _mm256_cvtepu8_epi16(vbefore); + __m256i vbehind256 = _mm256_cvtepu8_epi16(vbehind); + + __m256i vbeforeshifted = _mm256_slli_epi16(vbefore256, log2_factor); + + // Add rounding offset + vbeforeshifted = _mm256_add_epi16(vbeforeshifted, vrnd); + + __m256i vinterpolate = _mm256_sub_epi16(vbehind256, vbefore256); + + __m256i vrow0 = _mm256_add_epi16(vbeforeshifted, vinterpolate); + __m256i vrow1 = _mm256_add_epi16(vrow0, vinterpolate); + __m256i vrow2 = _mm256_add_epi16(vrow1, vinterpolate); + + vrow0 = _mm256_srai_epi16(vrow0, log2_factor); + vrow1 = _mm256_srai_epi16(vrow1, log2_factor); + vrow2 = _mm256_srai_epi16(vrow2, log2_factor); + + __m256i vres0 = _mm256_packus_epi16(vrow0, vrow1); + __m256i vres1 = _mm256_packus_epi16(vrow2, vbehind256); + + __m256i vlo128 = _mm256_permute2x128_si256(vres0, vres1, 0x20); + __m256i vhi128 = _mm256_permute2x128_si256(vres0, vres1, 0x31); + + _mm256_store_si256((__m256i*)(dst + 0), vlo128); + _mm256_store_si256((__m256i*)(dst + 32), vhi128); + + for (int i = 1; i < 4; ++i) { + vbefore = _mm_loadu_si128((__m128i*)(src + (i * 16 - 8))); + vbehind = _mm_loadu_si128((__m128i*)(src + (i * 16))); + vbefore256 = _mm256_cvtepu8_epi16(vbefore); + vbehind256 = _mm256_cvtepu8_epi16(vbehind); + + vbeforeshifted = _mm256_slli_epi16(vbefore256, log2_factor); + + // Add rounding offset + vbeforeshifted = _mm256_add_epi16(vbeforeshifted, vrnd); + + vinterpolate = _mm256_sub_epi16(vbehind256, vbefore256); + + vrow0 = _mm256_add_epi16(vbeforeshifted, vinterpolate); + vrow1 = _mm256_add_epi16(vrow0, vinterpolate); + vrow2 = _mm256_add_epi16(vrow1, vinterpolate); + + vrow0 = _mm256_srai_epi16(vrow0, log2_factor); + vrow1 = _mm256_srai_epi16(vrow1, log2_factor); + vrow2 = _mm256_srai_epi16(vrow2, log2_factor); + + vres0 = _mm256_packus_epi16(vrow0, vrow1); + vres1 = _mm256_packus_epi16(vrow2, vbehind256); + + vlo128 = _mm256_permute2x128_si256(vres0, vres1, 0x20); + vhi128 = _mm256_permute2x128_si256(vres0, vres1, 0x31); + + _mm256_store_si256((__m256i*)(dst + (i * 64) + 0), vlo128); + _mm256_store_si256((__m256i*)(dst + (i * 64) + 32), vhi128); + } +} + +static void mip_upsampling_w8_ups8_ver_avx2(uvg_pixel* const dst, const uvg_pixel* const src, const uvg_pixel* const ref) +{ + const uint8_t red_pred_size = 8; + const uint8_t ups_factor = 8; // height / red_pred_size + + const int log2_factor = uvg_g_convert_to_log2[ups_factor]; + const int rounding_offset = 1 << (log2_factor - 1); + __m256i vrnd = _mm256_set1_epi16(rounding_offset); + + int64_t refline = *(int64_t*)ref; + __m128i vbehind = _mm_loadu_si128((__m128i*)src); + __m128i vbefore = vbehind; + + vbefore = _mm_slli_si128(vbefore, 8); // Shift left to make room for one 64-bit integer. This could be done with a shuffle, but there should be no performance difference. + vbefore = _mm_insert_epi64(vbefore, refline, 0); + + __m256i vbefore256 = _mm256_cvtepu8_epi16(vbefore); + __m256i vbehind256 = _mm256_cvtepu8_epi16(vbehind); + + __m256i vbeforeshifted = _mm256_slli_epi16(vbefore256, log2_factor); + + // Add rounding offset + vbeforeshifted = _mm256_add_epi16(vbeforeshifted, vrnd); + + __m256i vinterpolate = _mm256_sub_epi16(vbehind256, vbefore256); + + __m256i vrow0 = _mm256_add_epi16(vbeforeshifted, vinterpolate); + __m256i vrow1 = _mm256_add_epi16(vrow0, vinterpolate); + __m256i vrow2 = _mm256_add_epi16(vrow1, vinterpolate); + __m256i vrow3 = _mm256_add_epi16(vrow2, vinterpolate); + __m256i vrow4 = _mm256_add_epi16(vrow3, vinterpolate); + __m256i vrow5 = _mm256_add_epi16(vrow4, vinterpolate); + __m256i vrow6 = _mm256_add_epi16(vrow5, vinterpolate); + + vrow0 = _mm256_srai_epi16(vrow0, log2_factor); + vrow1 = _mm256_srai_epi16(vrow1, log2_factor); + vrow2 = _mm256_srai_epi16(vrow2, log2_factor); + vrow3 = _mm256_srai_epi16(vrow3, log2_factor); + vrow4 = _mm256_srai_epi16(vrow4, log2_factor); + vrow5 = _mm256_srai_epi16(vrow5, log2_factor); + vrow6 = _mm256_srai_epi16(vrow6, log2_factor); + + __m256i vres0 = _mm256_packus_epi16(vrow0, vrow1); + __m256i vres1 = _mm256_packus_epi16(vrow2, vrow3); + __m256i vres2 = _mm256_packus_epi16(vrow4, vrow5); + __m256i vres3 = _mm256_packus_epi16(vrow6, vbehind256); + + __m256i vlo128a = _mm256_permute2x128_si256(vres0, vres1, 0x20); + __m256i vlo128b = _mm256_permute2x128_si256(vres2, vres3, 0x20); + __m256i vhi128a = _mm256_permute2x128_si256(vres0, vres1, 0x31); + __m256i vhi128b = _mm256_permute2x128_si256(vres2, vres3, 0x31); + + _mm256_store_si256((__m256i*)(dst + 0), vlo128a); + _mm256_store_si256((__m256i*)(dst + 32), vlo128b); + _mm256_store_si256((__m256i*)(dst + 64), vhi128a); + _mm256_store_si256((__m256i*)(dst + 96), vhi128b); + + for (int i = 1; i < 4; ++i) { + vbefore = _mm_loadu_si128((__m128i*)(src + (i * 16 - 8))); + vbehind = _mm_loadu_si128((__m128i*)(src + (i * 16))); + vbefore256 = _mm256_cvtepu8_epi16(vbefore); + vbehind256 = _mm256_cvtepu8_epi16(vbehind); + + vbeforeshifted = _mm256_slli_epi16(vbefore256, log2_factor); + + // Add rounding offset + vbeforeshifted = _mm256_add_epi16(vbeforeshifted, vrnd); + + vinterpolate = _mm256_sub_epi16(vbehind256, vbefore256); + + vrow0 = _mm256_add_epi16(vbeforeshifted, vinterpolate); + vrow1 = _mm256_add_epi16(vrow0, vinterpolate); + vrow2 = _mm256_add_epi16(vrow1, vinterpolate); + vrow3 = _mm256_add_epi16(vrow2, vinterpolate); + vrow4 = _mm256_add_epi16(vrow3, vinterpolate); + vrow5 = _mm256_add_epi16(vrow4, vinterpolate); + vrow6 = _mm256_add_epi16(vrow5, vinterpolate); + + vrow0 = _mm256_srai_epi16(vrow0, log2_factor); + vrow1 = _mm256_srai_epi16(vrow1, log2_factor); + vrow2 = _mm256_srai_epi16(vrow2, log2_factor); + vrow3 = _mm256_srai_epi16(vrow3, log2_factor); + vrow4 = _mm256_srai_epi16(vrow4, log2_factor); + vrow5 = _mm256_srai_epi16(vrow5, log2_factor); + vrow6 = _mm256_srai_epi16(vrow6, log2_factor); + + vres0 = _mm256_packus_epi16(vrow0, vrow1); + vres1 = _mm256_packus_epi16(vrow2, vrow3); + vres2 = _mm256_packus_epi16(vrow4, vrow5); + vres3 = _mm256_packus_epi16(vrow6, vbehind256); + + vlo128a = _mm256_permute2x128_si256(vres0, vres1, 0x20); + vlo128b = _mm256_permute2x128_si256(vres2, vres3, 0x20); + vhi128a = _mm256_permute2x128_si256(vres0, vres1, 0x31); + vhi128b = _mm256_permute2x128_si256(vres2, vres3, 0x31); + + _mm256_store_si256((__m256i*)(dst + (i * 128) + 0), vlo128a); + _mm256_store_si256((__m256i*)(dst + (i * 128) + 32), vlo128b); + _mm256_store_si256((__m256i*)(dst + (i * 128) + 64), vhi128a); + _mm256_store_si256((__m256i*)(dst + (i * 128) + 96), vhi128b); + } +} + + +static void mip_upsampling_w16_ups2_ver_avx2(uvg_pixel* const dst, const uvg_pixel* const src, const uvg_pixel* const ref) +{ + __m128i vbehind0 = _mm_loadu_si128((__m128i*)(src + 0)); + __m128i vbehind1 = _mm_loadu_si128((__m128i*)(src + 32)); + __m128i vbehind2 = _mm_loadu_si128((__m128i*)(src + 64)); + __m128i vbehind3 = _mm_loadu_si128((__m128i*)(src + 96)); + __m128i vbehind4 = _mm_loadu_si128((__m128i*)(src + 128)); + __m128i vbehind5 = _mm_loadu_si128((__m128i*)(src + 160)); + __m128i vbehind6 = _mm_loadu_si128((__m128i*)(src + 192)); + __m128i vbehind7 = _mm_loadu_si128((__m128i*)(src + 224)); + + __m128i vbefore0 = _mm_loadu_si128((__m128i*)ref); + __m128i vbefore1 = vbehind0; + __m128i vbefore2 = vbehind1; + __m128i vbefore3 = vbehind2; + __m128i vbefore4 = vbehind3; + __m128i vbefore5 = vbehind4; + __m128i vbefore6 = vbehind5; + __m128i vbefore7 = vbehind6; + + __m128i vavg0 = _mm_avg_epu8(vbefore0, vbehind0); + __m128i vavg1 = _mm_avg_epu8(vbefore1, vbehind1); + __m128i vavg2 = _mm_avg_epu8(vbefore2, vbehind2); + __m128i vavg3 = _mm_avg_epu8(vbefore3, vbehind3); + __m128i vavg4 = _mm_avg_epu8(vbefore4, vbehind4); + __m128i vavg5 = _mm_avg_epu8(vbefore5, vbehind5); + __m128i vavg6 = _mm_avg_epu8(vbefore6, vbehind6); + __m128i vavg7 = _mm_avg_epu8(vbefore7, vbehind7); + + _mm_store_si128((__m128i*)(dst + 0), vavg0); + _mm_store_si128((__m128i*)(dst + 32), vavg1); + _mm_store_si128((__m128i*)(dst + 64), vavg2); + _mm_store_si128((__m128i*)(dst + 96), vavg3); + _mm_store_si128((__m128i*)(dst + 128), vavg4); + _mm_store_si128((__m128i*)(dst + 160), vavg5); + _mm_store_si128((__m128i*)(dst + 192), vavg6); + _mm_store_si128((__m128i*)(dst + 224), vavg7); +} + +static void mip_upsampling_w16_ups4_ver_avx2(uvg_pixel* const dst, const uvg_pixel* const src, const uvg_pixel* const ref) +{ + const uvg_pixel* src_ptr = src; + const uvg_pixel* dst_ptr = dst; + + __m128i vbefore = _mm_load_si128((__m128i*)ref); + + const __m128i zeros = _mm_setzero_si128(); + const __m128i ones = _mm_set1_epi8(1); + const __m128i threes = _mm_set1_epi8(3); + + for (int i = 0; i < 8; ++i) { + __m128i vbehind = _mm_load_si128((__m128i*)src_ptr); + + // Calculate the 3 interpolated lines between before and behind. Top row, middle row and bottom row. + __m128i vmiddle = _mm_avg_epu8(vbefore, vbehind); + __m128i vtop = _mm_avg_epu8(vbefore, vmiddle); + __m128i vbottom = _mm_avg_epu8(vmiddle, vbehind); + + // Calculate the two last bits of difference between before and behind. These bits are used to determine if there will be rounding error. + // Rounding error occurs in the left interpolated value if the two last bits of the difference between before and behind is 0b01. + __m128i diff = _mm_sub_epi8(vbehind, vbefore); + diff = _mm_and_si128(diff, threes); + __m128i mask = _mm_cmpeq_epi8(diff, ones); // The rounding error mask will be generated based on the calculated last bits. + __m128i sub_amount = _mm_blendv_epi8(zeros, ones, mask); + + vtop = _mm_sub_epi8(vtop, sub_amount); + + // Same rounding error handling for bottom interpolated values. + // Error happens if the two last bits of the difference between before and behind is 0b11. + mask = _mm_cmpeq_epi8(diff, threes); + sub_amount = _mm_blendv_epi8(zeros, ones, mask); + + vbottom = _mm_sub_epi8(vbottom, sub_amount); + + // Store results + _mm_store_si128((__m128i*)(dst_ptr + 0), vtop); + _mm_store_si128((__m128i*)(dst_ptr + 16), vmiddle); + _mm_store_si128((__m128i*)(dst_ptr + 32), vbottom); + + vbefore = vbehind; + src_ptr += 64; + dst_ptr += 64; + } +} + +static void mip_upsampling_w16_ups8_ver_avx2(uvg_pixel* const dst, const uvg_pixel* const src, const uvg_pixel* const ref) +{ + const uint8_t red_pred_size = 8; + const uint8_t ups_factor = 8; // height / red_pred_size + + const int log2_factor = uvg_g_convert_to_log2[ups_factor]; + const int rounding_offset = 1 << (log2_factor - 1); + __m256i vrnd = _mm256_set1_epi16(rounding_offset); + + __m256i vbefore256; + __m256i vbehind256; + + __m128i vbefore = _mm_load_si128((__m128i*)ref); + vbefore256 = _mm256_cvtepu8_epi16(vbefore); + + for (int i = 0; i < 8; ++i) { + __m128i vbehind = _mm_loadu_si128((__m128i*)(src + (i * 128))); + vbehind256 = _mm256_cvtepu8_epi16(vbehind); + + __m256i vbeforeshifted = _mm256_slli_epi16(vbefore256, log2_factor); + + // Add rounding offset + vbeforeshifted = _mm256_add_epi16(vbeforeshifted, vrnd); + + __m256i vinterpolate = _mm256_sub_epi16(vbehind256, vbefore256); + + __m256i vrow0 = _mm256_add_epi16(vbeforeshifted, vinterpolate); + __m256i vrow1 = _mm256_add_epi16(vrow0, vinterpolate); + __m256i vrow2 = _mm256_add_epi16(vrow1, vinterpolate); + __m256i vrow3 = _mm256_add_epi16(vrow2, vinterpolate); + __m256i vrow4 = _mm256_add_epi16(vrow3, vinterpolate); + __m256i vrow5 = _mm256_add_epi16(vrow4, vinterpolate); + __m256i vrow6 = _mm256_add_epi16(vrow5, vinterpolate); + + vrow0 = _mm256_srai_epi16(vrow0, log2_factor); + vrow1 = _mm256_srai_epi16(vrow1, log2_factor); + vrow2 = _mm256_srai_epi16(vrow2, log2_factor); + vrow3 = _mm256_srai_epi16(vrow3, log2_factor); + vrow4 = _mm256_srai_epi16(vrow4, log2_factor); + vrow5 = _mm256_srai_epi16(vrow5, log2_factor); + vrow6 = _mm256_srai_epi16(vrow6, log2_factor); + + __m256i vres0 = _mm256_packus_epi16(vrow0, vrow1); + __m256i vres1 = _mm256_packus_epi16(vrow2, vrow3); + __m256i vres2 = _mm256_packus_epi16(vrow4, vrow5); + __m256i vres3 = _mm256_packus_epi16(vrow6, vbehind256); + + vres0 = _mm256_permute4x64_epi64(vres0, _MM_SHUFFLE(3, 1, 2, 0)); + vres1 = _mm256_permute4x64_epi64(vres1, _MM_SHUFFLE(3, 1, 2, 0)); + vres2 = _mm256_permute4x64_epi64(vres2, _MM_SHUFFLE(3, 1, 2, 0)); + vres3 = _mm256_permute4x64_epi64(vres3, _MM_SHUFFLE(3, 1, 2, 0)); + + _mm256_store_si256((__m256i*)(dst + (i * 128) + 0), vres0); + _mm256_store_si256((__m256i*)(dst + (i * 128) + 32), vres1); + _mm256_store_si256((__m256i*)(dst + (i * 128) + 64), vres2); + _mm256_store_si256((__m256i*)(dst + (i * 128) + 96), vres3); + + vbefore256 = vbehind256; + } +} + + +static void mip_upsampling_w32_ups2_ver_avx2(uvg_pixel* const dst, const uvg_pixel* const src, const uvg_pixel* const ref) +{ + __m256i vbefore = _mm256_loadu_si256((__m256i*)ref); + + for (int i = 0; i < 8; ++i) { + __m256i vbehind = _mm256_loadu_si256((__m256i*)(src + (i * 64))); + __m256i vavg = _mm256_avg_epu8(vbefore, vbehind); + + _mm256_storeu_si256((__m256i*)(dst + (i * 64)), vavg); + + vbefore = vbehind; + } +} + +static void mip_upsampling_w32_ups4_ver_avx2(uvg_pixel* const dst, const uvg_pixel* const src, const uvg_pixel* const ref) +{ + const uvg_pixel* src_ptr = src; + const uvg_pixel* dst_ptr = dst; + + __m256i vbefore = _mm256_loadu_si256((__m256i*)ref); + + const __m256i zeros = _mm256_setzero_si256(); + const __m256i ones = _mm256_set1_epi8(1); + const __m256i threes = _mm256_set1_epi8(3); + + for (int i = 0; i < 8; ++i) { + __m256i vbehind = _mm256_loadu_si256((__m256i*)src_ptr); + + // Calculate the 3 interpolated lines between before and behind. Top row, middle row and bottom row. + __m256i vmiddle = _mm256_avg_epu8(vbefore, vbehind); + __m256i vtop = _mm256_avg_epu8(vbefore, vmiddle); + __m256i vbottom = _mm256_avg_epu8(vmiddle, vbehind); + + // Calculate the two last bits of difference between before and behind. These bits are used to determine if there will be rounding error. + // Rounding error occurs in the left interpolated value if the two last bits of the difference between before and behind is 0b01. + __m256i diff = _mm256_sub_epi8(vbehind, vbefore); + diff = _mm256_and_si256(diff, threes); + __m256i mask = _mm256_cmpeq_epi8(diff, ones); // The rounding error mask will be generated based on the calculated last bits. + __m256i sub_amount = _mm256_blendv_epi8(zeros, ones, mask); + + vtop = _mm256_sub_epi8(vtop, sub_amount); + + // Same rounding error handling for bottom interpolated values. + // Error happens if the two last bits of the difference between before and behind is 0b11. + mask = _mm256_cmpeq_epi8(diff, threes); + sub_amount = _mm256_blendv_epi8(zeros, ones, mask); + + vbottom = _mm256_sub_epi8(vbottom, sub_amount); + + // Store results + _mm256_storeu_si256((__m256i*)(dst_ptr + 0), vtop); + _mm256_storeu_si256((__m256i*)(dst_ptr + 32), vmiddle); + _mm256_storeu_si256((__m256i*)(dst_ptr + 64), vbottom); + + vbefore = vbehind; + src_ptr += 128; + dst_ptr += 128; + } +} + +static void mip_upsampling_w32_ups8_ver_avx2(uvg_pixel* const dst, const uvg_pixel* const src, const uvg_pixel* const ref) +{ + const uvg_pixel* src_ptr = src; + const uvg_pixel* dst_ptr = dst; + + const __m256i zeros = _mm256_setzero_si256(); + const __m256i ones = _mm256_set1_epi8(1); + const __m256i twos = _mm256_set1_epi8(2); + const __m256i threes = _mm256_set1_epi8(3); + const __m256i fours = _mm256_set1_epi8(4); + const __m256i fives = _mm256_set1_epi8(5); + const __m256i sixes = _mm256_set1_epi8(6); + const __m256i sevens = _mm256_set1_epi8(7); + const __m256i eights = _mm256_set1_epi8(8); + + __m256i vbefore = _mm256_load_si256((__m256i*)(ref + 0)); + + for (int i = 0; i < 8; ++i) { + __m256i vbehind = _mm256_load_si256((__m256i*)src_ptr); + + // Calculate the 7 interpolated lines between before and behind. Ordered by number from top to bottom. + __m256i vrow3 = _mm256_avg_epu8(vbefore, vbehind); // Middle + __m256i vrow1 = _mm256_avg_epu8(vrow3, vbefore); // Top middle + __m256i vrow5 = _mm256_avg_epu8(vrow3, vbehind); // Bottom middle + __m256i vrow0 = _mm256_avg_epu8(vbefore, vrow1); // Top middle top + __m256i vrow2 = _mm256_avg_epu8(vrow1, vrow3); // Top middle bottom + __m256i vrow4 = _mm256_avg_epu8(vrow3, vrow5); // Bottom middle top + __m256i vrow6 = _mm256_avg_epu8(vrow5, vbehind); // Bottom middle bottom + + // Calculate the three and two last bits of difference between before and behind. These bits are used to determine if there will be rounding error. + __m256i diff = _mm256_sub_epi8(vbehind, vbefore); + diff = _mm256_and_si256(diff, sevens); + __m256i three_diff = _mm256_and_si256(diff, threes); + + // Bottom side + __m256i mask = _mm256_cmpgt_epi8(diff, fours); // The rounding error mask will be generated based on the calculated last bits. + __m256i sub_amount = _mm256_blendv_epi8(zeros, ones, mask); // If 5, 6, 7 select one + vrow6 = _mm256_sub_epi8(vrow6, sub_amount); + + mask = _mm256_cmpeq_epi8(three_diff, threes); + sub_amount = _mm256_blendv_epi8(zeros, ones, mask); // If 3 or 7 select one + vrow5 = _mm256_sub_epi8(vrow5, sub_amount); + + __m256i is_two = _mm256_cmpeq_epi8(diff, twos); + __m256i is_five = _mm256_cmpeq_epi8(diff, fives); + mask = _mm256_or_si256(mask, is_two); + mask = _mm256_or_si256(mask, is_five); + sub_amount = _mm256_blendv_epi8(zeros, ones, mask); // If 2, 3, 5, or 7 select one + vrow4 = _mm256_sub_epi8(vrow4, sub_amount); + + // Top side + diff = _mm256_blendv_epi8(diff, eights, _mm256_cmpeq_epi8(zeros, diff)); // Replace zeros with eights to enable using GT + mask = _mm256_cmpgt_epi8(diff, threes); + sub_amount = _mm256_blendv_epi8(ones, zeros, mask); // If greater than three select zero + vrow0 = _mm256_sub_epi8(vrow0, sub_amount); + + mask = _mm256_cmpeq_epi8(three_diff, ones); + sub_amount = _mm256_blendv_epi8(zeros, ones, mask); // If 1 or 5 select one + vrow1 = _mm256_sub_epi8(vrow1, sub_amount); + + __m256i is_three = _mm256_cmpeq_epi8(diff, threes); + __m256i is_six = _mm256_cmpeq_epi8(diff, sixes); + mask = _mm256_or_si256(mask, is_three); + mask = _mm256_or_si256(mask, is_six); + sub_amount = _mm256_blendv_epi8(zeros, ones, mask); // If 1, 3, 5, 6 select one + vrow2 = _mm256_sub_epi8(vrow2, sub_amount); + + // Store results + _mm256_store_si256((__m256i*)(dst_ptr + 0), vrow0); + _mm256_store_si256((__m256i*)(dst_ptr + 32), vrow1); + _mm256_store_si256((__m256i*)(dst_ptr + 64), vrow2); + _mm256_store_si256((__m256i*)(dst_ptr + 96), vrow3); + _mm256_store_si256((__m256i*)(dst_ptr + 128), vrow4); + _mm256_store_si256((__m256i*)(dst_ptr + 160), vrow5); + _mm256_store_si256((__m256i*)(dst_ptr + 192), vrow6); + + vbefore = vbehind; + src_ptr += 256; + dst_ptr += 256; + } +} + + +static void mip_upsampling_w64_ups2_ver_avx2(uvg_pixel* const dst, const uvg_pixel* const src, const uvg_pixel* const ref) +{ + __m256i vbeforeleft = _mm256_load_si256((__m256i*)(ref + 0)); + __m256i vbeforeright = _mm256_load_si256((__m256i*)(ref + 32)); + + for (int i = 0; i < 8; ++i) { + __m256i vbehindleft = _mm256_load_si256((__m256i*)(src + (i * 128) + 0)); + __m256i vbehindright = _mm256_load_si256((__m256i*)(src + (i * 128) + 32)); + __m256i vavgleft = _mm256_avg_epu8(vbeforeleft, vbehindleft); + __m256i vavgright = _mm256_avg_epu8(vbeforeright, vbehindright); + + _mm256_store_si256((__m256i*)(dst + (i * 128) + 0), vavgleft); + _mm256_store_si256((__m256i*)(dst + (i * 128) + 32), vavgright); + + vbeforeleft = vbehindleft; + vbeforeright = vbehindright; + } +} + +static void mip_upsampling_w64_ups4_ver_avx2(uvg_pixel* const dst, const uvg_pixel* const src, const uvg_pixel* const ref) +{ + const uvg_pixel* src_ptr = src; + const uvg_pixel* dst_ptr = dst; + + __m256i vbeforeleft = _mm256_load_si256((__m256i*)(ref + 0)); + __m256i vbeforeright = _mm256_load_si256((__m256i*)(ref + 32)); + + const __m256i zeros = _mm256_setzero_si256(); + const __m256i ones = _mm256_set1_epi8(1); + const __m256i threes = _mm256_set1_epi8(3); + + for (int i = 0; i < 8; ++i) { + // Calculate 4 lines at a time + __m256i vbehindleft = _mm256_load_si256((__m256i*)(src_ptr + 0)); + __m256i vbehindright = _mm256_load_si256((__m256i*)(src_ptr + 32)); + + // Calculate left side of 64 wide lane + // Calculate the 3 interpolated lines between before and behind. Top row, middle row and bottom row. + __m256i vmiddleleft = _mm256_avg_epu8(vbeforeleft, vbehindleft); + __m256i vtopleft = _mm256_avg_epu8(vbeforeleft, vmiddleleft); + __m256i vbottomleft = _mm256_avg_epu8(vmiddleleft, vbehindleft); + + // Calculate the two last bits of difference between before and behind. These bits are used to determine if there will be rounding error. + // Rounding error occurs in the left interpolated value if the two last bits of the difference between before and behind is 0b01. + __m256i diff = _mm256_sub_epi8(vbehindleft, vbeforeleft); + diff = _mm256_and_si256(diff, threes); + __m256i mask = _mm256_cmpeq_epi8(diff, ones); // The rounding error mask will be generated based on the calculated last bits. + __m256i sub_amount = _mm256_blendv_epi8(zeros, ones, mask); + + vtopleft = _mm256_sub_epi8(vtopleft, sub_amount); + + // Same rounding error handling for bottom interpolated values. + // Error happens if the two last bits of the difference between before and behind is 0b11. + mask = _mm256_cmpeq_epi8(diff, threes); + sub_amount = _mm256_blendv_epi8(zeros, ones, mask); + + vbottomleft = _mm256_sub_epi8(vbottomleft, sub_amount); + + + // Calculate right side of 64 wide lane + // Calculate the 3 interpolated lines between before and behind. Top row, middle row and bottom row. + __m256i vmiddleright = _mm256_avg_epu8(vbeforeright, vbehindright); + __m256i vtopright = _mm256_avg_epu8(vbeforeright, vmiddleright); + __m256i vbottomright = _mm256_avg_epu8(vmiddleright, vbehindright); + + // Calculate the two last bits of difference between before and behind. These bits are used to determine if there will be rounding error. + // Rounding error occurs in the right interpolated value if the two last bits of the difference between before and behind is 0b01. + diff = _mm256_sub_epi8(vbehindright, vbeforeright); + diff = _mm256_and_si256(diff, threes); + mask = _mm256_cmpeq_epi8(diff, ones); // The rounding error mask will be generated based on the calculated last bits. + sub_amount = _mm256_blendv_epi8(zeros, ones, mask); + + vtopright = _mm256_sub_epi8(vtopright, sub_amount); + + // Same rounding error handling for bottom interpolated values. + // Error happens if the two last bits of the difference between before and behind is 0b11. + mask = _mm256_cmpeq_epi8(diff, threes); + sub_amount = _mm256_blendv_epi8(zeros, ones, mask); + + vbottomright = _mm256_sub_epi8(vbottomright, sub_amount); + + // Store results + _mm256_store_si256((__m256i*)(dst_ptr + 0), vtopleft); + _mm256_store_si256((__m256i*)(dst_ptr + 32), vtopright); + _mm256_store_si256((__m256i*)(dst_ptr + 64), vmiddleleft); + _mm256_store_si256((__m256i*)(dst_ptr + 96), vmiddleright); + _mm256_store_si256((__m256i*)(dst_ptr + 128), vbottomleft); + _mm256_store_si256((__m256i*)(dst_ptr + 160), vbottomright); + // No need to store the last line of the 4 lines as it is already present in the result array and it was not modified in any way. + + vbeforeleft = vbehindleft; + vbeforeright = vbehindright; + + dst_ptr += 256; + src_ptr += 256; + } +} + +static void mip_upsampling_w64_ups8_ver_avx2(uvg_pixel* const dst, const uvg_pixel* const src, const uvg_pixel* const ref) +{ + const uvg_pixel* src_ptr = src; + const uvg_pixel* dst_ptr = dst; + + const __m256i zeros = _mm256_setzero_si256(); + const __m256i ones = _mm256_set1_epi8(1); + const __m256i twos = _mm256_set1_epi8(2); + const __m256i threes = _mm256_set1_epi8(3); + const __m256i fours = _mm256_set1_epi8(4); + const __m256i fives = _mm256_set1_epi8(5); + const __m256i sixes = _mm256_set1_epi8(6); + const __m256i sevens = _mm256_set1_epi8(7); + const __m256i eights = _mm256_set1_epi8(8); + + __m256i vbeforeleft = _mm256_load_si256((__m256i*)(ref + 0)); + __m256i vbeforeright = _mm256_load_si256((__m256i*)(ref + 32)); + + for (int i = 0; i < 8; ++i) { + __m256i vbehindleft = _mm256_load_si256((__m256i*)(src_ptr + 0)); + __m256i vbehindright = _mm256_load_si256((__m256i*)(src_ptr + 32)); + + // Calculate left side of 64 wide lane. + // Calculate the 7 interpolated lines between before and behind. Ordered by number from top to bottom. + __m256i vleft3 = _mm256_avg_epu8(vbeforeleft, vbehindleft); // Middle + __m256i vleft1 = _mm256_avg_epu8(vleft3, vbeforeleft); // Top middle + __m256i vleft5 = _mm256_avg_epu8(vleft3, vbehindleft); // Bottom middle + __m256i vleft0 = _mm256_avg_epu8(vbeforeleft, vleft1); // Top middle top + __m256i vleft2 = _mm256_avg_epu8(vleft1, vleft3); // Top middle bottom + __m256i vleft4 = _mm256_avg_epu8(vleft3, vleft5); // Bottom middle top + __m256i vleft6 = _mm256_avg_epu8(vleft5, vbehindleft); // Bottom middle bottom + + // Calculate the three and two last bits of difference between before and behind. These bits are used to determine if there will be rounding error. + __m256i diff = _mm256_sub_epi8(vbehindleft, vbeforeleft); + diff = _mm256_and_si256(diff, sevens); + __m256i three_diff = _mm256_and_si256(diff, threes); + + // Bottom side + __m256i mask = _mm256_cmpgt_epi8(diff, fours); // The rounding error mask will be generated based on the calculated last bits. + __m256i sub_amount = _mm256_blendv_epi8(zeros, ones, mask); // If 5, 6, 7 select one + vleft6 = _mm256_sub_epi8(vleft6, sub_amount); + + mask = _mm256_cmpeq_epi8(three_diff, threes); + sub_amount = _mm256_blendv_epi8(zeros, ones, mask); // If 3 or 7 select one + vleft5 = _mm256_sub_epi8(vleft5, sub_amount); + + __m256i is_two = _mm256_cmpeq_epi8(diff, twos); + __m256i is_five = _mm256_cmpeq_epi8(diff, fives); + mask = _mm256_or_si256(mask, is_two); + mask = _mm256_or_si256(mask, is_five); + sub_amount = _mm256_blendv_epi8(zeros, ones, mask); // If 2, 3, 5, or 7 select one + vleft4 = _mm256_sub_epi8(vleft4, sub_amount); + + // Top side + diff = _mm256_blendv_epi8(diff, eights, _mm256_cmpeq_epi8(zeros, diff)); // Replace zeros with eights to enable using GT + mask = _mm256_cmpgt_epi8(diff, threes); + sub_amount = _mm256_blendv_epi8(ones, zeros, mask); // If greater than three select zero + vleft0 = _mm256_sub_epi8(vleft0, sub_amount); + + mask = _mm256_cmpeq_epi8(three_diff, ones); + sub_amount = _mm256_blendv_epi8(zeros, ones, mask); // If 1 or 5 select one + vleft1 = _mm256_sub_epi8(vleft1, sub_amount); + + __m256i is_three = _mm256_cmpeq_epi8(diff, threes); + __m256i is_six = _mm256_cmpeq_epi8(diff, sixes); + mask = _mm256_or_si256(mask, is_three); + mask = _mm256_or_si256(mask, is_six); + sub_amount = _mm256_blendv_epi8(zeros, ones, mask); // If 1, 3, 5, 6 select one + vleft2 = _mm256_sub_epi8(vleft2, sub_amount); + + + // Calculate right side of 64 wide lane. + // Calculate the 7 interpolated lines between before and behind. Ordered by number from top to bottom. + __m256i vright3 = _mm256_avg_epu8(vbeforeright, vbehindright); // Middle + __m256i vright1 = _mm256_avg_epu8(vright3, vbeforeright); // Top middle + __m256i vright5 = _mm256_avg_epu8(vright3, vbehindright); // Bottom middle + __m256i vright0 = _mm256_avg_epu8(vbeforeright, vright1); // Top middle top + __m256i vright2 = _mm256_avg_epu8(vright1, vright3); // Top middle bottom + __m256i vright4 = _mm256_avg_epu8(vright3, vright5); // Bottom middle top + __m256i vright6 = _mm256_avg_epu8(vright5, vbehindright); // Bottom middle bottom + + // Calculate the three and two last bits of difference between before and behind. These bits are used to determine if there will be rounding error. + diff = _mm256_sub_epi8(vbehindright, vbeforeright); + diff = _mm256_and_si256(diff, sevens); + three_diff = _mm256_and_si256(diff, threes); + + // Bottom side + mask = _mm256_cmpgt_epi8(diff, fours); // The rounding error mask will be generated based on the calculated last bits. + sub_amount = _mm256_blendv_epi8(zeros, ones, mask); // If 5, 6, 7 select one + vright6 = _mm256_sub_epi8(vright6, sub_amount); + + mask = _mm256_cmpeq_epi8(three_diff, threes); + sub_amount = _mm256_blendv_epi8(zeros, ones, mask); // If 3 or 7 select one + vright5 = _mm256_sub_epi8(vright5, sub_amount); + + is_two = _mm256_cmpeq_epi8(diff, twos); + is_five = _mm256_cmpeq_epi8(diff, fives); + mask = _mm256_or_si256(mask, is_two); + mask = _mm256_or_si256(mask, is_five); + sub_amount = _mm256_blendv_epi8(zeros, ones, mask); // If 2, 3, 5, or 7 select one + vright4 = _mm256_sub_epi8(vright4, sub_amount); + + // Top side + diff = _mm256_blendv_epi8(diff, eights, _mm256_cmpeq_epi8(zeros, diff)); // Replace zeros with eights to enable using GT + mask = _mm256_cmpgt_epi8(diff, threes); + sub_amount = _mm256_blendv_epi8(ones, zeros, mask); // If greater than three select zero + vright0 = _mm256_sub_epi8(vright0, sub_amount); + + mask = _mm256_cmpeq_epi8(three_diff, ones); + sub_amount = _mm256_blendv_epi8(zeros, ones, mask); // If 1 or 5 select one + vright1 = _mm256_sub_epi8(vright1, sub_amount); + + is_three = _mm256_cmpeq_epi8(diff, threes); + is_six = _mm256_cmpeq_epi8(diff, sixes); + mask = _mm256_or_si256(mask, is_three); + mask = _mm256_or_si256(mask, is_six); + sub_amount = _mm256_blendv_epi8(zeros, ones, mask); // If 1, 3, 5, 6 select one + vright2 = _mm256_sub_epi8(vright2, sub_amount); + + + // Store results + _mm256_store_si256((__m256i*)(dst_ptr + 0), vleft0); + _mm256_store_si256((__m256i*)(dst_ptr + 32), vright0); + _mm256_store_si256((__m256i*)(dst_ptr + 64), vleft1); + _mm256_store_si256((__m256i*)(dst_ptr + 96), vright1); + _mm256_store_si256((__m256i*)(dst_ptr + 128), vleft2); + _mm256_store_si256((__m256i*)(dst_ptr + 160), vright2); + _mm256_store_si256((__m256i*)(dst_ptr + 192), vleft3); + _mm256_store_si256((__m256i*)(dst_ptr + 224), vright3); + _mm256_store_si256((__m256i*)(dst_ptr + 256), vleft4); + _mm256_store_si256((__m256i*)(dst_ptr + 288), vright4); + _mm256_store_si256((__m256i*)(dst_ptr + 320), vleft5); + _mm256_store_si256((__m256i*)(dst_ptr + 352), vright5); + _mm256_store_si256((__m256i*)(dst_ptr + 384), vleft6); + _mm256_store_si256((__m256i*)(dst_ptr + 416), vright6); + + vbeforeleft = vbehindleft; + vbeforeright = vbehindright; + + dst_ptr += 512; + src_ptr += 512; + } +} + +/** \brief Matrix weighted intra prediction. +*/ +static void mip_predict_avx2( + //const encoder_state_t* const state, + const uvg_intra_references* const refs, + const uint16_t pred_block_width, + const uint16_t pred_block_height, + uvg_pixel* dst, + const int mip_mode, + const bool mip_transp) +{ + // MIP prediction uses int values instead of uvg_pixel as some temp values may be negative + + //uvg_pixel* out = dst; + //uvg_pixel result[64 * 64] = { 0 }; + uvg_pixel* result = dst; + const int mode_idx = mip_mode; + + // *** INPUT PREP *** + + // Initialize prediction parameters START + uint16_t width = pred_block_width; + uint16_t height = pred_block_height; + + int size_id; // Prediction block type + if (width == 4 && height == 4) { + size_id = 0; + } + else if (width == 4 || height == 4 || (width == 8 && height == 8)) { + size_id = 1; + } + else { + size_id = 2; + } + + // Reduced boundary and prediction sizes + int red_bdry_size = (size_id == 0) ? 2 : 4; + int red_pred_size = (size_id < 2) ? 4 : 8; + + // Upsampling factors + uint16_t ups_hor_factor = width / red_pred_size; + uint16_t ups_ver_factor = height / red_pred_size; + // Initialize prediction parameters END + + const uvg_pixel* ref_samples_top = &refs->ref.top[1]; + const uvg_pixel* ref_samples_left = &refs->ref.left[1]; + + // Compute reduced boundary with Haar-downsampling + const int input_size = 2 * red_bdry_size; + + uvg_pixel red_bdry[MIP_MAX_INPUT_SIZE]; + uvg_pixel red_bdry_trans[MIP_MAX_INPUT_SIZE]; + int16_t red_bdry16[MIP_MAX_INPUT_SIZE]; + int16_t red_bdry_trans16[MIP_MAX_INPUT_SIZE]; + + uvg_pixel* const top_reduced = &red_bdry[0]; + uvg_pixel* const left_reduced = &red_bdry[red_bdry_size]; + + if (width == 4 && height == 4) { + // 4 to 2 downsampling for both dimensions + mip_ref_downsampling_4x4_4to2_avx2(top_reduced, ref_samples_top, ref_samples_left); + } + else if (width == 8 && height == 8) { + // 8 to 4 downsampling for both dimensions + mip_ref_downsampling_8x8_8to4_avx2(top_reduced, ref_samples_top, ref_samples_left); + } + else { + // Horizontal downsampling + switch (width) { + case 4: + // 4x4 case handled elsewhere. + // No horizontal downsampling needed. Copy pixels. + memcpy(top_reduced, ref_samples_top, 4 * sizeof(uvg_pixel)); + break; + case 8: mip_ref_downsampling_1D_8to4_avx2(top_reduced, ref_samples_top); break; // 8x8 case handled elsewhere. + case 16: mip_ref_downsampling_1D_16to4_avx2(top_reduced, ref_samples_top); break; + case 32: mip_ref_downsampling_1D_32to4_avx2(top_reduced, ref_samples_top); break; + case 64: mip_ref_downsampling_1D_64to4_avx2(top_reduced, ref_samples_top); break; + default: + assert(false && "MIP horizontal downsampling. Invalid width.\n"); + break; + } + + // Vertical downsampling + switch (height) { + case 4: + // 4x4 case handled elsewhere. + // No vertical downsampling needed. Copy pixels. + memcpy(left_reduced, ref_samples_left, 4 * sizeof(uvg_pixel)); + break; + case 8: mip_ref_downsampling_1D_8to4_avx2(left_reduced, ref_samples_left); break; // 8x8 case handled elsewhere. + case 16: mip_ref_downsampling_1D_16to4_avx2(left_reduced, ref_samples_left); break; + case 32: mip_ref_downsampling_1D_32to4_avx2(left_reduced, ref_samples_left); break; + case 64: mip_ref_downsampling_1D_64to4_avx2(left_reduced, ref_samples_left); break; + default: + assert(false && "MIP vertical downsampling. Invalid height.\n"); + break; + } + } + + + // Transposed reduced boundaries + uvg_pixel* const left_reduced_trans = &red_bdry_trans[0]; + uvg_pixel* const top_reduced_trans = &red_bdry_trans[red_bdry_size]; + + for (int x = 0; x < red_bdry_size; x++) { + top_reduced_trans[x] = top_reduced[x]; + } + for (int y = 0; y < red_bdry_size; y++) { + left_reduced_trans[y] = left_reduced[y]; + } + + uvg_pixel input_offset = red_bdry[0]; + uvg_pixel input_offset_trans = red_bdry_trans[0]; + + const bool has_first_col = (size_id < 2); + // First column of matrix not needed for large blocks + // These can potentially fail with uvg_pixel + red_bdry16[0] = has_first_col ? ((1 << (UVG_BIT_DEPTH - 1)) - input_offset) : 0; + red_bdry_trans16[0] = has_first_col ? ((1 << (UVG_BIT_DEPTH - 1)) - input_offset_trans) : 0; + + // This fails with uvg_pixel, here at least int16_t is needed + for (int i = 1; i < input_size; ++i) { + red_bdry16[i] = red_bdry[i] - input_offset; + red_bdry_trans16[i] = red_bdry_trans[i] - input_offset_trans; + } + + // *** INPUT PREP *** END + + // *** BLOCK PREDICT *** + + const bool need_upsampling = (ups_hor_factor > 1) || (ups_ver_factor > 1); + const bool transpose = mip_transp; + + const uint8_t* matrix = 0; + const uint16_t* matrix16 = 0; + switch (size_id) { + case 0: + matrix16 = &uvg_mip_sid0_weights[mode_idx][0][0]; + break; + case 1: + matrix16 = &uvg_mip_sid1_weights[mode_idx * 128]; + break; + case 2: + //matrix = &uvg_mip_matrix_16x16[mode_idx][0][0]; + matrix16 = &uvg_mip_sid2_weights[mode_idx * 512]; + break; + default: + assert(false && "Invalid MIP size id."); + } + + // Max possible size is red_pred_size * red_pred_size, red_pred_size can be either 4 or 8 + uvg_pixel red_pred_buffer[8 * 8]; + uvg_pixel* const reduced_pred = need_upsampling ? red_pred_buffer : result; + + const uvg_pixel* const reduced_bdry = transpose ? red_bdry_trans : red_bdry; + const int16_t* const reduced_bdry16 = transpose ? red_bdry_trans16 : red_bdry16; + + switch (size_id) { + case 0: mip_reduced_pred_sid0_avx2(reduced_pred, reduced_bdry16, matrix16, transpose, input_offset, input_offset_trans); break; + case 1: mip_reduced_pred_sid1_avx2(reduced_pred, reduced_bdry16, matrix16, transpose, input_offset, input_offset_trans); break; + case 2: mip_reduced_pred_sid2_avx2(reduced_pred, reduced_bdry16, matrix16, transpose, input_offset, input_offset_trans); break; + default: + assert(false && "Intra MIP: invalid size id.\n"); + break; + } + if (need_upsampling) { + const uvg_pixel* ver_src = reduced_pred; + uint16_t ver_src_step = width; + + if (ups_hor_factor > 1) { + uvg_pixel* const hor_dst = result + (ups_ver_factor - 1) * width; + ver_src = hor_dst; + ver_src_step *= ups_ver_factor; + + switch (width) { + // Case 4 does not exist. There is no need for horizontal upsampling when width is 4. + case 8: + // This will only get called for 8x8 blocks. + mip_upsampling_w8_ups2_hor_avx2(hor_dst, reduced_pred, ref_samples_left, ver_src_step, ups_ver_factor); + break; + case 16: + if (red_pred_size == 4) { + mip_upsampling_w16_ups4_hor_avx2(hor_dst, reduced_pred, ref_samples_left, ver_src_step, ups_ver_factor); + } + else { + mip_upsampling_w16_ups2_hor_avx2(hor_dst, reduced_pred, ref_samples_left, ver_src_step, ups_ver_factor); // Works for height 8, 16, 32 and 64. Upsamples 1 to 2. + } + break; + case 32: + if (red_pred_size == 4) { + mip_upsampling_w32_ups8_hor_avx2(hor_dst, reduced_pred, ref_samples_left, ver_src_step, ups_ver_factor); + } + else { + mip_upsampling_w32_ups4_hor_avx2(hor_dst, reduced_pred, ref_samples_left, ver_src_step, ups_ver_factor); // Works for height 8, 16, 32 and 64. Upsamples 1 to 4. + } + break; + case 64: + mip_upsampling_w64_ups8_hor_avx2(hor_dst, reduced_pred, ref_samples_left, ver_src_step, ups_ver_factor); // Works for height 8, 16, 32 and 64. Upsamples 1 to 8. + break; + default: + assert(false && "Invalid MIP width.\n"); + break; + } + } + + if (ups_ver_factor > 1) { + switch (width) { + case 4: + if (ups_ver_factor == 2) { + mip_upsampling_w4_ups2_ver_avx2(result, ver_src, ref_samples_top); + } + else if (ups_ver_factor == 4) { + mip_upsampling_w4_ups4_ver_avx2(result, ver_src, ref_samples_top); + } + else { + mip_upsampling_w4_ups8_ver_avx2(result, ver_src, ref_samples_top); + } + break; + + case 8: + if (ups_ver_factor == 2) { + if (height == 8) { + mip_upsampling_w8_ups2_h8_ver_avx2(result, ver_src, ref_samples_top); + } + else { // Height == 16 + mip_upsampling_w8_ups2_h16_ver_avx2(result, ver_src, ref_samples_top); + } + } + else if (ups_ver_factor == 4) { + mip_upsampling_w8_ups4_ver_avx2(result, ver_src, ref_samples_top); + } + else { + mip_upsampling_w8_ups8_ver_avx2(result, ver_src, ref_samples_top); + } + break; + + case 16: + if (ups_ver_factor == 2) { + mip_upsampling_w16_ups2_ver_avx2(result, ver_src, ref_samples_top); + } + else if (ups_ver_factor == 4) { + mip_upsampling_w16_ups4_ver_avx2(result, ver_src, ref_samples_top); + } + else { + mip_upsampling_w16_ups8_ver_avx2(result, ver_src, ref_samples_top); + } + break; + + case 32: + if (ups_ver_factor == 2) { + mip_upsampling_w32_ups2_ver_avx2(result, ver_src, ref_samples_top); + } + else if (ups_ver_factor == 4) { + mip_upsampling_w32_ups4_ver_avx2(result, ver_src, ref_samples_top); + } + else { + mip_upsampling_w32_ups8_ver_avx2(result, ver_src, ref_samples_top); + } + break; + + case 64: + if (ups_ver_factor == 2) { + mip_upsampling_w64_ups2_ver_avx2(result, ver_src, ref_samples_top); + } + else if (ups_ver_factor == 4) { + mip_upsampling_w64_ups4_ver_avx2(result, ver_src, ref_samples_top); + } + else { + mip_upsampling_w64_ups8_ver_avx2(result, ver_src, ref_samples_top); + } + break; + + default: + assert(false && "Invalid MIP width.\n"); + break; + } + } + } + // *** BLOCK PREDICT *** END +} + + +#endif // UVG_BIT_DEPTH == 8 + +#endif // COMPILE_INTEL_AVX2 && defined X86_64 int uvg_strategy_register_intra_avx2(void* opaque, uint8_t bitdepth) { @@ -1077,8 +6666,8 @@ int uvg_strategy_register_intra_avx2(void* opaque, uint8_t bitdepth) if (bitdepth == 8) { success &= uvg_strategyselector_register(opaque, "angular_pred", "avx2", 40, &uvg_angular_pred_avx2); success &= uvg_strategyselector_register(opaque, "intra_pred_planar", "avx2", 40, &uvg_intra_pred_planar_avx2); - success &= uvg_strategyselector_register(opaque, "intra_pred_filtered_dc", "avx2", 40, &uvg_intra_pred_filtered_dc_avx2); success &= uvg_strategyselector_register(opaque, "pdpc_planar_dc", "avx2", 40, &uvg_pdpc_planar_dc_avx2); + success &= uvg_strategyselector_register(opaque, "mip_predict", "avx2", 40, &mip_predict_avx2); } #endif //UVG_BIT_DEPTH == 8 #endif //COMPILE_INTEL_AVX2 && defined X86_64 diff --git a/src/strategies/avx2/intra_avx2_tables.h b/src/strategies/avx2/intra_avx2_tables.h new file mode 100644 index 00000000..df0ed5a3 --- /dev/null +++ b/src/strategies/avx2/intra_avx2_tables.h @@ -0,0 +1,3745 @@ +#ifndef INTRA_AVX2_TABLES_H +#define INTRA_AVX2_TABLES_H + +#include "global.h" + +// Used for calculating table offsets for each prediction mode. +// If row length changes, this value must be updated. +// Remember to use values divisible by 16 to ensure tables stay aligned to 32 bytes. +#define DELTA_TABLE_ROW_LENGTH 80 + +// The number of unique 128-bit coefficient vectors for a given prediction mode. Applicable for width 4 chroma linear interpolation. +static ALIGNED(32) const int8_t coeff_vector128_num_by_mode[33] = { + 1, 16, 8, 16, 4, 8, 1, 8, 4, 8, 2, 8, 4, 16, 8, 16, + 1, 16, 8, 16, 4, 8, 2, 8, 4, 8, 1, 8, 4, 16, 8, 16, 1 +}; + +static ALIGNED(32) const int8_t coeff_vector128_num_by_mode_wide_angle[14] = { + 1, 16, 1, 16, 1, 8, 8, 16, 1, 16, 16, 16, 16, 16 +}; + + +static ALIGNED(32) const int16_t coeff_table_mode_offsets[33] = { + 0, 16, 272, 400, 656, 720, 848, 864, 992, 1056, 1184, 1216, 1344, 1408, 1664, 1792, + 2048, 2064, 2320, 2448, 2704, 2768, 2896, 2928, 3056, 3120, 3248, 3264, 3392, 3456, 3712, 3840, 4096 +}; + +static ALIGNED(32) const int16_t mode_to_weight_table_offset_w4_hor[35] = { + 0, 0, 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240, 256, 272, 288, 304, 320, 336, 352, 368, 384, 400, 416, 432, 448, 464, 480, 496, 512 +}; + +static ALIGNED(32) const int16_t mode_to_shuffle_vector_table_offset_w4_hor[35] = { + 0, 0, 0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, 864, 896, 928, 960, 992, 1024 +}; + + +// Index with (mode - 2) * 8 + (y >> 2). The given index will point to the correct place in shuffle vector table. +static ALIGNED(32) const int16_t intra_chroma_linear_interpolation_w4_ver_shuffle_vector_offset[] = { + 0, 0, 0, 0, 0, 0, 0, 0, // Mode 2 + 0, 0, 32, 0, 0, 64, 0, 0, // Mode 3 + 0, 64, 32, 0, 0, 64, 32, 0, // Mode 4 + 96, 96, 32, 32, 64, 64, 0, 0, // Mode 5 + 32, 64, 32, 64, 32, 64, 32, 64, // Mode 6 + 32, 32, 128, 64, 32, 32, 128, 64, // Mode 7 + 32, 32, 32, 32, 32, 32, 32, 32, // Mode 8 + 128, 128, 32, 32, 128, 128, 32, 32, // Mode 9 + 128, 32, 128, 32, 128, 32, 128, 32, // Mode 10 + 160, 128, 192, 160, 160, 128, 192, 160, // Mode 11 + 160, 160, 160, 160, 160, 160, 160, 160, // Mode 12 + 224, 192, 128, 160, 224, 192, 128, 160, // Mode 13 + 224, 160, 224, 160, 224, 160, 224, 160, // Mode 14 + 224, 224, 128, 224, 224, 192, 224, 160, // Mode 15 + 224, 224, 224, 160, 224, 224, 224, 160, // Mode 16 + 224, 224, 224, 224, 224, 224, 224, 160, // Mode 17 + 224, 224, 224, 224, 224, 224, 224, 224, // Mode 18 + 224, 224, 224, 224, 224, 224, 224, 224, // Mode 19 + 224, 224, 224, 224, 224, 224, 224, 224, // Mode 20 + 224, 224, 256, 224, 224, 288, 224, 224, // Mode 21 + 224, 224, 224, 224, 224, 224, 224, 224, // Mode 22 + 224, 288, 256, 224, 224, 288, 256, 224, // Mode 23 + 224, 224, 224, 224, 224, 224, 224, 224, // Mode 24 + 320, 256, 288, 224, 320, 256, 288, 224, // Mode 25 + 256, 288, 256, 288, 256, 288, 256, 288, // Mode 26 + 256, 256, 352, 288, 256, 256, 352, 288, // Mode 27 + 256, 256, 256, 256, 256, 256, 256, 256, // Mode 28 + 352, 352, 256, 256, 352, 352, 256, 256, // Mode 29 + 352, 256, 352, 256, 352, 256, 352, 256, // Mode 30 + 384, 384, 352, 352, 416, 416, 448, 384, // Mode 31 + 448, 416, 352, 384, 448, 416, 352, 384, // Mode 32 + 448, 448, 352, 448, 448, 416, 448, 384, // Mode 33 + 448, 448, 448, 448, 448, 448, 448, 448 // Mode 34 +}; + + +// Shuffle vectors for w4 vertical. This is indexed based on the shape of delta int table for each mode. +static ALIGNED(32) const int8_t intra_chroma_linear_interpolation_shuffle_vectors_w4_ver[] = { // Shape of the delta int table in sets of four + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, // [0, 1, 2, 3] + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, // [0, 1, 1, 2] + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, // [0, 0, 1, 2] + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, // [0, 1, 2, 2] + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, // [0, 0, 1, 1] + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, // [0, 0, 0, 1] + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, // [0, 1, 1, 1] + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, // [0, 0, 0, 0] + 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, // [1, 1, 0, 0] + 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, // [1, 0, 0, 0] + 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, // [1, 1, 1, 0] + 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, // [2, 1, 1, 0] + 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, // [2, 1, 0, 0] + 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, // [2, 2, 1, 0] + 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, // [3, 2, 1, 0] +}; + + +// NOTE: shuffle vectors for w8, w16, and w32 vertical do not exists as they are not needed. + + +static ALIGNED(32) const int8_t intra_chroma_linear_interpolation_shuffle_vectors_w4_hor[] = { + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, // Mode 2 + 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, + 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b, 0x0b, 0x0c, + 0x09, 0x0a, 0x0a, 0x0b, 0x0b, 0x0c, 0x0c, 0x0d, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, // Mode 3 + 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, + 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b, 0x0b, 0x0c, + 0x09, 0x0a, 0x0a, 0x0b, 0x0b, 0x0c, 0x0c, 0x0d, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, // Mode 4 + 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, + 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b, 0x0b, 0x0c, + 0x09, 0x0a, 0x0a, 0x0b, 0x0b, 0x0c, 0x0c, 0x0d, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, // Mode 5 + 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, + 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b, 0x0a, 0x0b, + 0x09, 0x0a, 0x0a, 0x0b, 0x0b, 0x0c, 0x0b, 0x0c, + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, // Mode 6 + 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, + 0x08, 0x09, 0x09, 0x0a, 0x09, 0x0a, 0x0a, 0x0b, + 0x09, 0x0a, 0x0a, 0x0b, 0x0a, 0x0b, 0x0b, 0x0c, + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, // Mode 7 + 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, + 0x08, 0x09, 0x09, 0x0a, 0x09, 0x0a, 0x0a, 0x0b, + 0x09, 0x0a, 0x0a, 0x0b, 0x0a, 0x0b, 0x0b, 0x0c, + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, // Mode 8 + 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, + 0x08, 0x09, 0x09, 0x0a, 0x09, 0x0a, 0x0a, 0x0b, + 0x09, 0x0a, 0x0a, 0x0b, 0x0a, 0x0b, 0x0b, 0x0c, + 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, // Mode 9 + 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, + 0x08, 0x09, 0x08, 0x09, 0x09, 0x0a, 0x09, 0x0a, + 0x09, 0x0a, 0x09, 0x0a, 0x0a, 0x0b, 0x0a, 0x0b, + 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, // Mode 10 + 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, + 0x08, 0x09, 0x08, 0x09, 0x09, 0x0a, 0x09, 0x0a, + 0x09, 0x0a, 0x09, 0x0a, 0x0a, 0x0b, 0x0a, 0x0b, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, // Mode 11 + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, + 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x09, 0x0a, + 0x09, 0x0a, 0x09, 0x0a, 0x09, 0x0a, 0x0a, 0x0b, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, // Mode 12 + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, + 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x09, 0x0a, + 0x09, 0x0a, 0x09, 0x0a, 0x09, 0x0a, 0x0a, 0x0b, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 13 + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, + 0x09, 0x0a, 0x09, 0x0a, 0x09, 0x0a, 0x09, 0x0a, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 14 + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, + 0x09, 0x0a, 0x09, 0x0a, 0x09, 0x0a, 0x09, 0x0a, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 15 + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, + 0x09, 0x0a, 0x09, 0x0a, 0x09, 0x0a, 0x09, 0x0a, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 16 + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, + 0x09, 0x0a, 0x09, 0x0a, 0x09, 0x0a, 0x09, 0x0a, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 17 + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, + 0x09, 0x0a, 0x09, 0x0a, 0x09, 0x0a, 0x09, 0x0a, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 18 + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, + 0x09, 0x0a, 0x09, 0x0a, 0x09, 0x0a, 0x09, 0x0a, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 19 + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, + 0x09, 0x0a, 0x09, 0x0a, 0x09, 0x0a, 0x09, 0x0a, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 20 + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, + 0x09, 0x0a, 0x09, 0x0a, 0x09, 0x0a, 0x09, 0x0a, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 21 + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, + 0x09, 0x0a, 0x09, 0x0a, 0x09, 0x0a, 0x09, 0x0a, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 22 + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, + 0x09, 0x0a, 0x09, 0x0a, 0x09, 0x0a, 0x09, 0x0a, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 23 + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, + 0x09, 0x0a, 0x09, 0x0a, 0x09, 0x0a, 0x09, 0x0a, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 24 + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, + 0x09, 0x0a, 0x09, 0x0a, 0x09, 0x0a, 0x09, 0x0a, + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, // Mode 25 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, + 0x09, 0x0a, 0x09, 0x0a, 0x09, 0x0a, 0x08, 0x09, + 0x0a, 0x0b, 0x0a, 0x0b, 0x0a, 0x0b, 0x09, 0x0a, + 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, // Mode 26 + 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, + 0x09, 0x0a, 0x09, 0x0a, 0x08, 0x09, 0x08, 0x09, + 0x0a, 0x0b, 0x0a, 0x0b, 0x09, 0x0a, 0x09, 0x0a, + 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, // Mode 27 + 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, + 0x09, 0x0a, 0x09, 0x0a, 0x08, 0x09, 0x08, 0x09, + 0x0a, 0x0b, 0x0a, 0x0b, 0x09, 0x0a, 0x09, 0x0a, + 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, // Mode 28 + 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, + 0x09, 0x0a, 0x09, 0x0a, 0x08, 0x09, 0x08, 0x09, + 0x0a, 0x0b, 0x0a, 0x0b, 0x09, 0x0a, 0x09, 0x0a, + 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, // Mode 29 + 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, + 0x0a, 0x0b, 0x09, 0x0a, 0x09, 0x0a, 0x08, 0x09, + 0x0b, 0x0c, 0x0a, 0x0b, 0x0a, 0x0b, 0x09, 0x0a, + 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, // Mode 30 + 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, + 0x0a, 0x0b, 0x09, 0x0a, 0x09, 0x0a, 0x08, 0x09, + 0x0b, 0x0c, 0x0a, 0x0b, 0x0a, 0x0b, 0x09, 0x0a, + 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, // Mode 31 + 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, + 0x0a, 0x0b, 0x09, 0x0a, 0x08, 0x09, 0x08, 0x09, + 0x0b, 0x0c, 0x0a, 0x0b, 0x09, 0x0a, 0x09, 0x0a, + 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, // Mode 32 + 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, + 0x0b, 0x0c, 0x0a, 0x0b, 0x09, 0x0a, 0x08, 0x09, + 0x0c, 0x0d, 0x0b, 0x0c, 0x0a, 0x0b, 0x09, 0x0a, + 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, // Mode 33 + 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, + 0x0b, 0x0c, 0x0a, 0x0b, 0x09, 0x0a, 0x08, 0x09, + 0x0c, 0x0d, 0x0b, 0x0c, 0x0a, 0x0b, 0x09, 0x0a, + 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, // Mode 34 + 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, + 0x0b, 0x0c, 0x0a, 0x0b, 0x09, 0x0a, 0x08, 0x09, + 0x0c, 0x0d, 0x0b, 0x0c, 0x0a, 0x0b, 0x09, 0x0a +}; + + +static ALIGNED(32) const int8_t intra_chroma_linear_interpolation_shuffle_vectors_w8_hor[] = { + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, // Mode 2 + 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, // Mode 3 + 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, // Mode 4 + 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, // Mode 5 + 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, // Mode 6 + 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, // Mode 7 + 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, // Mode 8 + 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, + 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, // Mode 9 + 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, + 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, // Mode 10 + 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, // Mode 11 + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, // Mode 12 + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, // Mode 13 + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, // Mode 14 + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 15 + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 16 + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 17 + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 18 + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 19 + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 20 + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 21 + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 22 + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 23 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 24 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, // Mode 25 + 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, + 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 26 + 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, // Mode 27 + 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, + 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, // Mode 28 + 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, + 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, // Mode 29 + 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, + 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, // Mode 30 + 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, + 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, // Mode 31 + 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, + 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, // Mode 32 + 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, + 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, // Mode 33 + 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, + 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, // Mode 34 + 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, +}; + + +static ALIGNED(32) const int8_t intra_chroma_linear_interpolation_shuffle_vectors_w16_hor[] = { + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, // Mode 2 + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, + 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, + 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, // Mode 3 + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, + 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, + 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, // Mode 4 + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, + 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, + 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, // Mode 5 + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, + 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, + 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, // Mode 6 + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, + 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, + 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, // Mode 7 + 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, + 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, + 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, // Mode 8 + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, + 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, + 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, + 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, // Mode 9 + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, + 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, + 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, + 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, // Mode 10 + 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, + 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, + 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, // Mode 11 + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, + 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, // Mode 12 + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, // Mode 13 + 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, + 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, // Mode 14 + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 15 + 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 16 + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 17 + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 18 + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 19 + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 20 + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 21 + 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 22 + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 23 + 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 24 + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, // Mode 25 + 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, + 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, + 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 26 + 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, + 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, // Mode 27 + 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, + 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, + 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, + 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, // Mode 28 + 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, + 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, + 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, + 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, // Mode 29 + 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, + 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, + 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, + 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, // Mode 30 + 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, + 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, + 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, + 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, // Mode 31 + 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, + 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, + 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, + 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, // Mode 32 + 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, + 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, + 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, + 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, // Mode 33 + 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, + 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, + 0x07, 0x08, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, + 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, // Mode 34 + 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, + 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, + 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, +}; + + +static ALIGNED(32) const int8_t intra_chroma_linear_interpolation_shuffle_vectors_w32_hor[] = { + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b, 0x0b, 0x0c, 0x0c, 0x0d, 0x0d, 0x0e, 0x0e, 0x0f, 0x0f, 0x10, // Mode 2 + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b, 0x0b, 0x0c, 0x0c, 0x0d, 0x0d, 0x0e, 0x0e, 0x0f, 0x0f, 0x10, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x09, 0x0a, 0x0a, 0x0b, 0x0b, 0x0c, 0x0c, 0x0d, 0x0d, 0x0e, 0x0e, 0x0f, // Mode 3 + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b, 0x0b, 0x0c, 0x0c, 0x0d, 0x0d, 0x0e, 0x0e, 0x0f, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b, 0x0b, 0x0c, 0x0c, 0x0d, 0x0d, 0x0e, // Mode 4 + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b, 0x0b, 0x0c, 0x0c, 0x0d, 0x0d, 0x0e, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b, 0x0a, 0x0b, 0x0b, 0x0c, // Mode 5 + 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b, 0x0b, 0x0c, + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b, // Mode 6 + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b, + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, 0x07, 0x08, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, // Mode 7 + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, 0x07, 0x08, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, 0x07, 0x08, 0x07, 0x08, 0x08, 0x09, // Mode 8 + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, 0x07, 0x08, 0x07, 0x08, 0x08, 0x09, + 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, 0x07, 0x08, // Mode 9 + 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, 0x07, 0x08, + 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, // Mode 10 + 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, // Mode 11 + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, // Mode 12 + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, // Mode 13 + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, // Mode 14 + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, // Mode 15 + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, // Mode 16 + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 17 + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 18 + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 19 + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 20 + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 21 + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 22 + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 23 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, + 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 24 + 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, + 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 25 + 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, + 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 26 + 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, + 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, // Mode 27 + 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, + 0x07, 0x08, 0x07, 0x08, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, // Mode 28 + 0x07, 0x08, 0x07, 0x08, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, + 0x08, 0x09, 0x07, 0x08, 0x07, 0x08, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, // Mode 29 + 0x08, 0x09, 0x07, 0x08, 0x07, 0x08, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, + 0x09, 0x0a, 0x08, 0x09, 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, // Mode 30 + 0x09, 0x0a, 0x08, 0x09, 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, + 0x0b, 0x0c, 0x0a, 0x0b, 0x09, 0x0a, 0x09, 0x0a, 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, // Mode 31 + 0x0a, 0x0b, 0x0a, 0x0b, 0x09, 0x0a, 0x08, 0x09, 0x07, 0x08, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, + 0x0c, 0x0d, 0x0b, 0x0c, 0x0a, 0x0b, 0x09, 0x0a, 0x08, 0x09, 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, // Mode 32 + 0x0c, 0x0d, 0x0b, 0x0c, 0x0a, 0x0b, 0x09, 0x0a, 0x08, 0x09, 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, + 0x0e, 0x0f, 0x0d, 0x0e, 0x0c, 0x0d, 0x0b, 0x0c, 0x0a, 0x0b, 0x09, 0x0a, 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, // Mode 33 + 0x0d, 0x0e, 0x0c, 0x0d, 0x0b, 0x0c, 0x0a, 0x0b, 0x09, 0x0a, 0x09, 0x0a, 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, + 0x0f, 0x10, 0x0e, 0x0f, 0x0d, 0x0e, 0x0c, 0x0d, 0x0b, 0x0c, 0x0a, 0x0b, 0x09, 0x0a, 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, // Mode 34 + 0x0f, 0x10, 0x0e, 0x0f, 0x0d, 0x0e, 0x0c, 0x0d, 0x0b, 0x0c, 0x0a, 0x0b, 0x09, 0x0a, 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, +}; + +// Intra interpolation shuffle vectors for luma w4 horizontal. Includes wide angle modes [-12, 1]. Wide angle numbering goes from -12 to 1 since planar and DC (0, 1) are not considered angular modes. +static ALIGNED(32) const int8_t intra_luma_interpolation_shuffle_vectors_w4_hor[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Mode -12 | not used + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Mode -11 | not used + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Mode -10 | not used + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Mode -9 | not used + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Mode -8 | not used + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x03, 0x04, 0x06, 0x07, 0x09, 0x0a, 0x01, 0x02, 0x04, 0x05, 0x07, 0x08, 0x0a, 0x0b, 0x02, 0x03, 0x05, 0x06, 0x08, 0x09, 0x0b, 0x0c, 0x03, 0x04, 0x06, 0x07, 0x09, 0x0a, 0x0c, 0x0d, // Mode -7 mrl 0 + 0x02, 0x03, 0x05, 0x06, 0x08, 0x09, 0x0b, 0x0c, 0x03, 0x04, 0x06, 0x07, 0x09, 0x0a, 0x0c, 0x0d, 0x04, 0x05, 0x07, 0x08, 0x0a, 0x0b, 0x0d, 0x0e, 0x05, 0x06, 0x08, 0x09, 0x0b, 0x0c, 0x0e, 0x0f, + 0x00, 0x01, 0x03, 0x04, 0x06, 0x07, 0x09, 0x0a, 0x01, 0x02, 0x04, 0x05, 0x07, 0x08, 0x0a, 0x0b, 0x02, 0x03, 0x05, 0x06, 0x08, 0x09, 0x0b, 0x0c, 0x03, 0x04, 0x06, 0x07, 0x09, 0x0a, 0x0c, 0x0d, // mrl 1 + 0x02, 0x03, 0x05, 0x06, 0x08, 0x09, 0x0b, 0x0c, 0x03, 0x04, 0x06, 0x07, 0x09, 0x0a, 0x0c, 0x0d, 0x04, 0x05, 0x07, 0x08, 0x0a, 0x0b, 0x0d, 0x0e, 0x05, 0x06, 0x08, 0x09, 0x0b, 0x0c, 0x0e, 0x0f, + 0x00, 0x01, 0x03, 0x04, 0x06, 0x07, 0x0a, 0x0b, 0x01, 0x02, 0x04, 0x05, 0x07, 0x08, 0x0b, 0x0c, 0x02, 0x03, 0x05, 0x06, 0x08, 0x09, 0x0c, 0x0d, 0x03, 0x04, 0x06, 0x07, 0x09, 0x0a, 0x0d, 0x0e, // mrl 2 + 0x02, 0x03, 0x05, 0x06, 0x08, 0x09, 0x0c, 0x0d, 0x03, 0x04, 0x06, 0x07, 0x09, 0x0a, 0x0d, 0x0e, 0x04, 0x05, 0x07, 0x08, 0x0a, 0x0b, 0x0e, 0x0f, 0x05, 0x06, 0x08, 0x09, 0x0b, 0x0c, 0x0f, 0x10, + 0x00, 0x01, 0x03, 0x04, 0x06, 0x07, 0x08, 0x09, 0x01, 0x02, 0x04, 0x05, 0x07, 0x08, 0x09, 0x0a, 0x02, 0x03, 0x05, 0x06, 0x08, 0x09, 0x0a, 0x0b, 0x03, 0x04, 0x06, 0x07, 0x09, 0x0a, 0x0b, 0x0c, // Mode -6 mrl 0 + 0x02, 0x03, 0x05, 0x06, 0x08, 0x09, 0x0a, 0x0b, 0x03, 0x04, 0x06, 0x07, 0x09, 0x0a, 0x0b, 0x0c, 0x04, 0x05, 0x07, 0x08, 0x0a, 0x0b, 0x0c, 0x0d, 0x05, 0x06, 0x08, 0x09, 0x0b, 0x0c, 0x0d, 0x0e, + 0x00, 0x01, 0x03, 0x04, 0x05, 0x06, 0x08, 0x09, 0x01, 0x02, 0x04, 0x05, 0x06, 0x07, 0x09, 0x0a, 0x02, 0x03, 0x05, 0x06, 0x07, 0x08, 0x0a, 0x0b, 0x03, 0x04, 0x06, 0x07, 0x08, 0x09, 0x0b, 0x0c, // mrl 1 + 0x02, 0x03, 0x05, 0x06, 0x07, 0x08, 0x0a, 0x0b, 0x03, 0x04, 0x06, 0x07, 0x08, 0x09, 0x0b, 0x0c, 0x04, 0x05, 0x07, 0x08, 0x09, 0x0a, 0x0c, 0x0d, 0x05, 0x06, 0x08, 0x09, 0x0a, 0x0b, 0x0d, 0x0e, + 0x00, 0x01, 0x02, 0x03, 0x05, 0x06, 0x08, 0x09, 0x01, 0x02, 0x03, 0x04, 0x06, 0x07, 0x09, 0x0a, 0x02, 0x03, 0x04, 0x05, 0x07, 0x08, 0x0a, 0x0b, 0x03, 0x04, 0x05, 0x06, 0x08, 0x09, 0x0b, 0x0c, // mrl 2 + 0x02, 0x03, 0x04, 0x05, 0x07, 0x08, 0x0a, 0x0b, 0x03, 0x04, 0x05, 0x06, 0x08, 0x09, 0x0b, 0x0c, 0x04, 0x05, 0x06, 0x07, 0x09, 0x0a, 0x0c, 0x0d, 0x05, 0x06, 0x07, 0x08, 0x0a, 0x0b, 0x0d, 0x0e, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x07, 0x08, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x08, 0x09, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x09, 0x0a, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x0a, 0x0b, // Mode -5 mrl 0 + 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x09, 0x0a, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x0a, 0x0b, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0b, 0x0c, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0c, 0x0d, + 0x00, 0x01, 0x02, 0x03, 0x05, 0x06, 0x07, 0x08, 0x01, 0x02, 0x03, 0x04, 0x06, 0x07, 0x08, 0x09, 0x02, 0x03, 0x04, 0x05, 0x07, 0x08, 0x09, 0x0a, 0x03, 0x04, 0x05, 0x06, 0x08, 0x09, 0x0a, 0x0b, // mrl 1 + 0x02, 0x03, 0x04, 0x05, 0x07, 0x08, 0x09, 0x0a, 0x03, 0x04, 0x05, 0x06, 0x08, 0x09, 0x0a, 0x0b, 0x04, 0x05, 0x06, 0x07, 0x09, 0x0a, 0x0b, 0x0c, 0x05, 0x06, 0x07, 0x08, 0x0a, 0x0b, 0x0c, 0x0d, + 0x00, 0x01, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x01, 0x02, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x02, 0x03, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x03, 0x04, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, // mrl 2 + 0x02, 0x03, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x03, 0x04, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x04, 0x05, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x05, 0x06, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, // Mode -4 mrl 0 + 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, // mrl 1 + 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, // mrl 2 + 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, // Mode -3 mrl 0 + 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x05, 0x06, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x06, 0x07, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x07, 0x08, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x08, 0x09, // mrl 1 + 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x07, 0x08, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x08, 0x09, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x09, 0x0a, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0a, 0x0b, + 0x00, 0x01, 0x02, 0x03, 0x03, 0x04, 0x05, 0x06, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x06, 0x07, 0x02, 0x03, 0x04, 0x05, 0x05, 0x06, 0x07, 0x08, 0x03, 0x04, 0x05, 0x06, 0x06, 0x07, 0x08, 0x09, // mrl 2 + 0x02, 0x03, 0x04, 0x05, 0x05, 0x06, 0x07, 0x08, 0x03, 0x04, 0x05, 0x06, 0x06, 0x07, 0x08, 0x09, 0x04, 0x05, 0x06, 0x07, 0x07, 0x08, 0x09, 0x0a, 0x05, 0x06, 0x07, 0x08, 0x08, 0x09, 0x0a, 0x0b, + 0x00, 0x01, 0x02, 0x03, 0x03, 0x04, 0x05, 0x06, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x06, 0x07, 0x02, 0x03, 0x04, 0x05, 0x05, 0x06, 0x07, 0x08, 0x03, 0x04, 0x05, 0x06, 0x06, 0x07, 0x08, 0x09, // Mode -2 mrl 0 + 0x02, 0x03, 0x04, 0x05, 0x05, 0x06, 0x07, 0x08, 0x03, 0x04, 0x05, 0x06, 0x06, 0x07, 0x08, 0x09, 0x04, 0x05, 0x06, 0x07, 0x07, 0x08, 0x09, 0x0a, 0x05, 0x06, 0x07, 0x08, 0x08, 0x09, 0x0a, 0x0b, + 0x00, 0x01, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x01, 0x02, 0x02, 0x03, 0x04, 0x05, 0x05, 0x06, 0x02, 0x03, 0x03, 0x04, 0x05, 0x06, 0x06, 0x07, 0x03, 0x04, 0x04, 0x05, 0x06, 0x07, 0x07, 0x08, // mrl 1 + 0x02, 0x03, 0x03, 0x04, 0x05, 0x06, 0x06, 0x07, 0x03, 0x04, 0x04, 0x05, 0x06, 0x07, 0x07, 0x08, 0x04, 0x05, 0x05, 0x06, 0x07, 0x08, 0x08, 0x09, 0x05, 0x06, 0x06, 0x07, 0x08, 0x09, 0x09, 0x0a, + 0x00, 0x01, 0x02, 0x03, 0x03, 0x04, 0x05, 0x06, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x06, 0x07, 0x02, 0x03, 0x04, 0x05, 0x05, 0x06, 0x07, 0x08, 0x03, 0x04, 0x05, 0x06, 0x06, 0x07, 0x08, 0x09, // mrl 2 + 0x02, 0x03, 0x04, 0x05, 0x05, 0x06, 0x07, 0x08, 0x03, 0x04, 0x05, 0x06, 0x06, 0x07, 0x08, 0x09, 0x04, 0x05, 0x06, 0x07, 0x07, 0x08, 0x09, 0x0a, 0x05, 0x06, 0x07, 0x08, 0x08, 0x09, 0x0a, 0x0b, + 0x00, 0x01, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x01, 0x02, 0x02, 0x03, 0x04, 0x05, 0x05, 0x06, 0x02, 0x03, 0x03, 0x04, 0x05, 0x06, 0x06, 0x07, 0x03, 0x04, 0x04, 0x05, 0x06, 0x07, 0x07, 0x08, // Mode -1 mrl 0 + 0x02, 0x03, 0x03, 0x04, 0x05, 0x06, 0x06, 0x07, 0x03, 0x04, 0x04, 0x05, 0x06, 0x07, 0x07, 0x08, 0x04, 0x05, 0x05, 0x06, 0x07, 0x08, 0x08, 0x09, 0x05, 0x06, 0x06, 0x07, 0x08, 0x09, 0x09, 0x0a, + 0x00, 0x01, 0x02, 0x03, 0x03, 0x04, 0x05, 0x06, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x06, 0x07, 0x02, 0x03, 0x04, 0x05, 0x05, 0x06, 0x07, 0x08, 0x03, 0x04, 0x05, 0x06, 0x06, 0x07, 0x08, 0x09, // mrl 1 + 0x02, 0x03, 0x04, 0x05, 0x05, 0x06, 0x07, 0x08, 0x03, 0x04, 0x05, 0x06, 0x06, 0x07, 0x08, 0x09, 0x04, 0x05, 0x06, 0x07, 0x07, 0x08, 0x09, 0x0a, 0x05, 0x06, 0x07, 0x08, 0x08, 0x09, 0x0a, 0x0b, + 0x00, 0x01, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x01, 0x02, 0x02, 0x03, 0x04, 0x05, 0x05, 0x06, 0x02, 0x03, 0x03, 0x04, 0x05, 0x06, 0x06, 0x07, 0x03, 0x04, 0x04, 0x05, 0x06, 0x07, 0x07, 0x08, // mrl 2 + 0x02, 0x03, 0x03, 0x04, 0x05, 0x06, 0x06, 0x07, 0x03, 0x04, 0x04, 0x05, 0x06, 0x07, 0x07, 0x08, 0x04, 0x05, 0x05, 0x06, 0x07, 0x08, 0x08, 0x09, 0x05, 0x06, 0x06, 0x07, 0x08, 0x09, 0x09, 0x0a, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, // Mode 0 mrl 0 + 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x07, 0x08, // mrl 1 + 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x09, 0x0a, + 0x00, 0x01, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x01, 0x02, 0x02, 0x03, 0x04, 0x05, 0x05, 0x06, 0x02, 0x03, 0x03, 0x04, 0x05, 0x06, 0x06, 0x07, 0x03, 0x04, 0x04, 0x05, 0x06, 0x07, 0x07, 0x08, // mrl 2 + 0x02, 0x03, 0x03, 0x04, 0x05, 0x06, 0x06, 0x07, 0x03, 0x04, 0x04, 0x05, 0x06, 0x07, 0x07, 0x08, 0x04, 0x05, 0x05, 0x06, 0x07, 0x08, 0x08, 0x09, 0x05, 0x06, 0x06, 0x07, 0x08, 0x09, 0x09, 0x0a, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, // Mode 1 mrl 0 + 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, // mrl 1 + 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, // mrl 2 + 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, // Mode 2 mrl 0 + 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, // mrl 1 + 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, // mrl 2 + 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, // Mode 3 mrl 0 + 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, // mrl 1 + 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, // mrl 2 + 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, // Mode 4 mrl 0 + 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, // mrl 1 + 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, // mrl 2 + 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x07, 0x08, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, // Mode 5 mrl 0 + 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x07, 0x08, + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, // mrl 1 + 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, 0x07, 0x08, + 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, // mrl 2 + 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, // Mode 6 mrl 0 + 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, 0x07, 0x08, + 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, // mrl 1 + 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, // mrl 2 + 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x07, 0x08, + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, // Mode 7 mrl 0 + 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, 0x07, 0x08, + 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, // mrl 1 + 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, // mrl 2 + 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, 0x07, 0x08, + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, // Mode 8 mrl 0 + 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, 0x07, 0x08, + 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, // mrl 1 + 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, // mrl 2 + 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, 0x07, 0x08, + 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, // Mode 9 mrl 0 + 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, // mrl 1 + 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, 0x07, 0x08, + 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, // mrl 2 + 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, + 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, // Mode 10 mrl 0 + 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, // mrl 1 + 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, // mrl 2 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, // Mode 11 mrl 0 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, + 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, // mrl 1 + 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, // mrl 2 + 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, // Mode 12 mrl 0 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, + 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, // mrl 1 + 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, // mrl 2 + 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, // Mode 13 mrl 0 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, // mrl 1 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, // mrl 2 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, // Mode 14 mrl 0 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, // mrl 1 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, // mrl 2 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, // Mode 15 mrl 0 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, // mrl 1 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, // mrl 2 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, // Mode 16 mrl 0 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, // mrl 1 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, // mrl 2 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, // Mode 17 mrl 0 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, // mrl 1 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, // mrl 2 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, // Mode 18 mrl 0 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, // mrl 1 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, // mrl 2 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, // Mode 19 mrl 0 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, // mrl 1 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, // mrl 2 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, // Mode 20 mrl 0 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, // mrl 1 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, // mrl 2 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, // Mode 21 mrl 0 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, // mrl 1 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, // mrl 2 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, // Mode 22 mrl 0 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, // mrl 1 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, // mrl 2 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, // Mode 23 mrl 0 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, // mrl 1 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, // mrl 2 + 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, // Mode 24 mrl 0 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, // mrl 1 + 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, + 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, // mrl 2 + 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, // Mode 25 mrl 0 + 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, + 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, // mrl 1 + 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, + 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, // mrl 2 + 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, + 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, // Mode 26 mrl 0 + 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, + 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, // mrl 1 + 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, // mrl 2 + 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, + 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, // Mode 27 mrl 0 + 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, + 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, // mrl 1 + 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x07, 0x08, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, + 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, // mrl 2 + 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, + 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, // Mode 28 mrl 0 + 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, + 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, // mrl 1 + 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x07, 0x08, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, + 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, // mrl 2 + 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, + 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, // Mode 29 mrl 0 + 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x07, 0x08, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, + 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, // mrl 1 + 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, + 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, // mrl 2 + 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x07, 0x08, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, + 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, // Mode 30 mrl 0 + 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x07, 0x08, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, + 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, // mrl 1 + 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x07, 0x08, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, + 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, // mrl 2 + 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, + 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, // Mode 31 mrl 0 + 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, + 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, // mrl 1 + 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x07, 0x08, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, + 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, // mrl 2 + 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x07, 0x08, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, + 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, // Mode 32 mrl 0 + 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, + 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, // mrl 1 + 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, + 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, // mrl 2 + 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, + 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, // Mode 33 mrl 0 + 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, + 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, // mrl 1 + 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, + 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, // mrl 2 + 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, + 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, // Mode 34 mrl 0 + 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, + 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, // mrl 1 + 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, + 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, // mrl 2 + 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, +}; + +// Intra interpolation shuffle vectors for luma w8 horizontal. +static ALIGNED(32) const int8_t intra_luma_interpolation_shuffle_vectors_w8_hor[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Mode -12 | not used + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Mode -11 | not used + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Mode -10 | not used + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Mode -9 | not used + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Mode -8 | not used + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Mode -7 | not used + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Mode -6 | not used + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Mode -5 | not used + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Mode -4 | not used + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Mode -3 | not used + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x02, 0x03, 0x03, 0x04, 0x05, 0x06, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0b, 0x0c, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x06, 0x07, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0c, 0x0d, // Mode -2 mrl 0 + 0x02, 0x03, 0x04, 0x05, 0x05, 0x06, 0x07, 0x08, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0d, 0x0e, 0x03, 0x04, 0x05, 0x06, 0x06, 0x07, 0x08, 0x09, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0e, 0x0f, + 0x00, 0x01, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x09, 0x0a, 0x0b, 0x0c, 0x01, 0x02, 0x02, 0x03, 0x04, 0x05, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0a, 0x0b, 0x0c, 0x0d, // mrl 1 + 0x02, 0x03, 0x03, 0x04, 0x05, 0x06, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0b, 0x0c, 0x0d, 0x0e, 0x03, 0x04, 0x04, 0x05, 0x06, 0x07, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0c, 0x0d, 0x0e, 0x0f, + 0x00, 0x01, 0x02, 0x03, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x08, 0x09, 0x0a, 0x0b, 0x0b, 0x0c, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x09, 0x0a, 0x0b, 0x0c, 0x0c, 0x0d, // mrl 2 + 0x02, 0x03, 0x04, 0x05, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0a, 0x0b, 0x0c, 0x0d, 0x0d, 0x0e, 0x03, 0x04, 0x05, 0x06, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0b, 0x0c, 0x0d, 0x0e, 0x0e, 0x0f, + 0x00, 0x01, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x0a, 0x0b, 0x01, 0x02, 0x02, 0x03, 0x04, 0x05, 0x05, 0x06, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x0b, 0x0c, // Mode -1 mrl 0 + 0x02, 0x03, 0x03, 0x04, 0x05, 0x06, 0x06, 0x07, 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b, 0x0c, 0x0d, 0x03, 0x04, 0x04, 0x05, 0x06, 0x07, 0x07, 0x08, 0x09, 0x0a, 0x0a, 0x0b, 0x0b, 0x0c, 0x0d, 0x0e, + 0x00, 0x01, 0x02, 0x03, 0x03, 0x04, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x09, 0x0a, 0x0a, 0x0b, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x0a, 0x0b, 0x0b, 0x0c, // mrl 1 + 0x02, 0x03, 0x04, 0x05, 0x05, 0x06, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x0b, 0x0c, 0x0c, 0x0d, 0x03, 0x04, 0x05, 0x06, 0x06, 0x07, 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b, 0x0c, 0x0d, 0x0d, 0x0e, + 0x00, 0x01, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x07, 0x08, 0x08, 0x09, 0x0a, 0x0b, 0x01, 0x02, 0x02, 0x03, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x09, 0x09, 0x0a, 0x0b, 0x0c, // mrl 2 + 0x02, 0x03, 0x03, 0x04, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x09, 0x0a, 0x0a, 0x0b, 0x0c, 0x0d, 0x03, 0x04, 0x04, 0x05, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x0a, 0x0b, 0x0b, 0x0c, 0x0d, 0x0e, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, // Mode 0 mrl 0 + 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b, 0x0b, 0x0c, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, // mrl 1 + 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b, 0x0b, 0x0c, + 0x00, 0x01, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x09, 0x0a, 0x01, 0x02, 0x02, 0x03, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x0a, 0x0b, // mrl 2 + 0x02, 0x03, 0x03, 0x04, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x0b, 0x0c, 0x03, 0x04, 0x04, 0x05, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b, 0x0c, 0x0d, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, // Mode 1 mrl 0 + 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, // mrl 1 + 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, // mrl 2 + 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, // Mode 2 mrl 0 + 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, // mrl 1 + 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, // mrl 2 + 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, // Mode 3 mrl 0 + 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, // mrl 1 + 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, // mrl 2 + 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, // Mode 4 mrl 0 + 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, // mrl 1 + 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, // mrl 2 + 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, // Mode 5 mrl 0 + 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x07, 0x08, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x08, 0x09, + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, // mrl 1 + 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, 0x07, 0x08, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x07, 0x08, 0x08, 0x09, + 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, // mrl 2 + 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, // Mode 6 mrl 0 + 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, + 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, // mrl 1 + 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x07, 0x08, + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, // mrl 2 + 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, 0x07, 0x08, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x07, 0x08, 0x08, 0x09, + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, // Mode 7 mrl 0 + 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, 0x07, 0x08, + 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, // mrl 1 + 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, // mrl 2 + 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x07, 0x08, + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, // Mode 8 mrl 0 + 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, 0x07, 0x08, + 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, // mrl 1 + 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, // mrl 2 + 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, 0x07, 0x08, + 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, // Mode 9 mrl 0 + 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, // mrl 1 + 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07, + 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, // mrl 2 + 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, + 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, // Mode 10 mrl 0 + 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, // mrl 1 + 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, // mrl 2 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, // Mode 11 mrl 0 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, + 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, // mrl 1 + 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, // mrl 2 + 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, // Mode 12 mrl 0 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, + 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, // mrl 1 + 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, // mrl 2 + 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, // Mode 13 mrl 0 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, // mrl 1 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, // mrl 2 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, // Mode 14 mrl 0 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, // mrl 1 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, // mrl 2 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, // Mode 15 mrl 0 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, // mrl 1 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, // mrl 2 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, // Mode 16 mrl 0 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, // mrl 1 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, // mrl 2 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, // Mode 17 mrl 0 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, // mrl 1 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, // mrl 2 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, // Mode 18 mrl 0 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, // mrl 1 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, // mrl 2 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, // Mode 19 mrl 0 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, // mrl 1 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, // mrl 2 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, // Mode 20 mrl 0 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, // mrl 1 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, // mrl 2 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, // Mode 21 mrl 0 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, // mrl 1 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, // mrl 2 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, // Mode 22 mrl 0 + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, // mrl 1 + 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, // mrl 2 + 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, // Mode 23 mrl 0 + 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, // mrl 1 + 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, // mrl 2 + 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, + 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, // Mode 24 mrl 0 + 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, // mrl 1 + 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, + 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, // mrl 2 + 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, // Mode 25 mrl 0 + 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, + 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, // mrl 1 + 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, + 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, // mrl 2 + 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, + 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, // Mode 26 mrl 0 + 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, + 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, // mrl 1 + 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, + 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, // mrl 2 + 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x05, 0x06, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, + 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, // Mode 27 mrl 0 + 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, + 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, // mrl 1 + 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, + 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, // mrl 2 + 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, + 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, // Mode 28 mrl 0 + 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, + 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, // mrl 1 + 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x07, 0x08, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, + 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, // mrl 2 + 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, + 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, // Mode 29 mrl 0 + 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x07, 0x08, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, + 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, // mrl 1 + 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x07, 0x08, 0x07, 0x08, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, + 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, // mrl 2 + 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x07, 0x08, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, + 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, // Mode 30 mrl 0 + 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x07, 0x08, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, + 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, // mrl 1 + 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x07, 0x08, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, + 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, // mrl 2 + 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, + 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, 0x00, 0x01, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, // Mode 31 mrl 0 + 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, + 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02, 0x00, 0x01, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, // mrl 1 + 0x07, 0x08, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x08, 0x09, 0x07, 0x08, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, + 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, // mrl 2 + 0x07, 0x08, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x08, 0x09, 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, + 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, // Mode 32 mrl 0 + 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x09, 0x0a, 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, + 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, // mrl 1 + 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x09, 0x0a, 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, + 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, // mrl 2 + 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x09, 0x0a, 0x08, 0x09, 0x07, 0x08, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, + 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, // Mode 33 mrl 0 + 0x09, 0x0a, 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x0a, 0x0b, 0x09, 0x0a, 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, + 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, // mrl 1 + 0x09, 0x0a, 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x0a, 0x0b, 0x09, 0x0a, 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, + 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, // mrl 2 + 0x09, 0x0a, 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x0a, 0x0b, 0x09, 0x0a, 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, + 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, // Mode 34 mrl 0 + 0x09, 0x0a, 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x0a, 0x0b, 0x09, 0x0a, 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, + 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, // mrl 1 + 0x09, 0x0a, 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x0a, 0x0b, 0x09, 0x0a, 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, + 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, 0x00, 0x01, 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x01, 0x02, // mrl 2 + 0x09, 0x0a, 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, 0x02, 0x03, 0x0a, 0x0b, 0x09, 0x0a, 0x08, 0x09, 0x07, 0x08, 0x06, 0x07, 0x05, 0x06, 0x04, 0x05, 0x03, 0x04, +}; + +// Intra interpolation shuffle vectors for luma w64 horizontal. +// w16 and w32 functions can also use values in this table. +static ALIGNED(32) const int8_t intra_luma_interpolation_shuffle_vectors_w64_hor[] = { + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, // Mode 5 mrl 0 + 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, 0x09, 0x0a, 0x0b, 0x0c, 0x0a, 0x0b, 0x0c, 0x0d, 0x0a, 0x0b, 0x0c, 0x0d, 0x0b, 0x0c, 0x0d, 0x0e, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, + 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b, 0x09, 0x0a, 0x0b, 0x0c, 0x0a, 0x0b, 0x0c, 0x0d, 0x0b, 0x0c, 0x0d, 0x0e, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, + 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, 0x09, 0x0a, 0x0b, 0x0c, 0x0a, 0x0b, 0x0c, 0x0d, 0x0a, 0x0b, 0x0c, 0x0d, 0x0b, 0x0c, 0x0d, 0x0e, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, + 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b, 0x09, 0x0a, 0x0b, 0x0c, 0x0a, 0x0b, 0x0c, 0x0d, 0x0b, 0x0c, 0x0d, 0x0e, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, // mrl 1 + 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, 0x09, 0x0a, 0x0b, 0x0c, 0x09, 0x0a, 0x0b, 0x0c, 0x0a, 0x0b, 0x0c, 0x0d, 0x0b, 0x0c, 0x0d, 0x0e, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, + 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b, 0x09, 0x0a, 0x0b, 0x0c, 0x0a, 0x0b, 0x0c, 0x0d, 0x0b, 0x0c, 0x0d, 0x0e, 0x0b, 0x0c, 0x0d, 0x0e, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, + 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, 0x09, 0x0a, 0x0b, 0x0c, 0x09, 0x0a, 0x0b, 0x0c, 0x0a, 0x0b, 0x0c, 0x0d, 0x0b, 0x0c, 0x0d, 0x0e, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, + 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b, 0x09, 0x0a, 0x0b, 0x0c, 0x0a, 0x0b, 0x0c, 0x0d, 0x0b, 0x0c, 0x0d, 0x0e, 0x0b, 0x0c, 0x0d, 0x0e, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, // mrl 2 + 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b, 0x09, 0x0a, 0x0b, 0x0c, 0x0a, 0x0b, 0x0c, 0x0d, 0x0a, 0x0b, 0x0c, 0x0d, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, + 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, 0x09, 0x0a, 0x0b, 0x0c, 0x0a, 0x0b, 0x0c, 0x0d, 0x0a, 0x0b, 0x0c, 0x0d, 0x0b, 0x0c, 0x0d, 0x0e, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, + 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b, 0x09, 0x0a, 0x0b, 0x0c, 0x0a, 0x0b, 0x0c, 0x0d, 0x0a, 0x0b, 0x0c, 0x0d, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, + 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, 0x09, 0x0a, 0x0b, 0x0c, 0x0a, 0x0b, 0x0c, 0x0d, 0x0a, 0x0b, 0x0c, 0x0d, 0x0b, 0x0c, 0x0d, 0x0e, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, // Mode 6 mrl 0 + 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b, 0x09, 0x0a, 0x0b, 0x0c, 0x0a, 0x0b, 0x0c, 0x0d, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, + 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b, 0x09, 0x0a, 0x0b, 0x0c, 0x0a, 0x0b, 0x0c, 0x0d, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, + 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b, 0x09, 0x0a, 0x0b, 0x0c, 0x0a, 0x0b, 0x0c, 0x0d, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, + 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b, 0x09, 0x0a, 0x0b, 0x0c, 0x0a, 0x0b, 0x0c, 0x0d, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, // mrl 1 + 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, 0x09, 0x0a, 0x0b, 0x0c, 0x09, 0x0a, 0x0b, 0x0c, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, + 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, 0x09, 0x0a, 0x0b, 0x0c, 0x09, 0x0a, 0x0b, 0x0c, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, + 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, 0x09, 0x0a, 0x0b, 0x0c, 0x09, 0x0a, 0x0b, 0x0c, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, + 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, 0x09, 0x0a, 0x0b, 0x0c, 0x09, 0x0a, 0x0b, 0x0c, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, // mrl 2 + 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, 0x09, 0x0a, 0x0b, 0x0c, 0x09, 0x0a, 0x0b, 0x0c, 0x0a, 0x0b, 0x0c, 0x0d, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, + 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, 0x09, 0x0a, 0x0b, 0x0c, 0x09, 0x0a, 0x0b, 0x0c, 0x0a, 0x0b, 0x0c, 0x0d, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, + 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, 0x09, 0x0a, 0x0b, 0x0c, 0x09, 0x0a, 0x0b, 0x0c, 0x0a, 0x0b, 0x0c, 0x0d, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, + 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, 0x09, 0x0a, 0x0b, 0x0c, 0x09, 0x0a, 0x0b, 0x0c, 0x0a, 0x0b, 0x0c, 0x0d, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, // Mode 7 mrl 0 + 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, 0x09, 0x0a, 0x0b, 0x0c, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, + 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, 0x09, 0x0a, 0x0b, 0x0c, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, + 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, 0x09, 0x0a, 0x0b, 0x0c, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, + 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, 0x09, 0x0a, 0x0b, 0x0c, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, // mrl 1 + 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, + 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, + 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, + 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, // mrl 2 + 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b, 0x09, 0x0a, 0x0b, 0x0c, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, + 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b, 0x09, 0x0a, 0x0b, 0x0c, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, + 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b, 0x09, 0x0a, 0x0b, 0x0c, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, + 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b, 0x09, 0x0a, 0x0b, 0x0c, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, // Mode 8 mrl 0 + 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, + 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, + 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, + 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, // mrl 1 + 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, + 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, + 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, + 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, // mrl 2 + 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, + 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, + 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, + 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x08, 0x09, 0x0a, 0x0b, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, // Mode 9 mrl 0 + 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, + 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, + 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, + 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, // mrl 1 + 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, + 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, + 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, + 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, // mrl 2 + 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, + 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, + 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, + 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, // Mode 10 mrl 0 + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, // mrl 1 + 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, + 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, + 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, + 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, // mrl 2 + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, // Mode 11 mrl 0 + 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, + 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, + 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, + 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, // mrl 1 + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, // mrl 2 + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, // Mode 12 mrl 0 + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, // mrl 1 + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, // mrl 2 + 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, + 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, + 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, + 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, // Mode 13 mrl 0 + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, // mrl 1 + 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, // mrl 2 + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, // Mode 14 mrl 0 + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, // mrl 1 + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, // mrl 2 + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, // Mode 15 mrl 0 + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, // mrl 1 + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, // mrl 2 + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, // Mode 16 mrl 0 + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, // mrl 1 + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, // mrl 2 + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, // Mode 17 mrl 0 + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, // mrl 1 + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, // mrl 2 + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, // Mode 18 mrl 0 + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, // mrl 1 + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, // mrl 2 + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, // Mode 19 mrl 0 + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, // mrl 1 + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, // mrl 2 + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, // Mode 20 mrl 0 + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, // mrl 1 + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, // mrl 2 + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, // Mode 21 mrl 0 + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, // mrl 1 + 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, // mrl 2 + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, // Mode 22 mrl 0 + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, // mrl 1 + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, // mrl 2 + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, // Mode 23 mrl 0 + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, // mrl 1 + 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, + 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, + 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, + 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, // mrl 2 + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, // Mode 24 mrl 0 + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, // mrl 1 + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, // mrl 2 + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, // Mode 25 mrl 0 + 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, + 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, + 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, + 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, // mrl 1 + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, // mrl 2 + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, // Mode 26 mrl 0 + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, // mrl 1 + 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, + 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, + 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, + 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, // mrl 2 + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, + 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, // Mode 27 mrl 0 + 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, + 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, + 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, + 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, // mrl 1 + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, // mrl 2 + 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, + 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, + 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, + 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, // Mode 28 mrl 0 + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, // mrl 1 + 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, + 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, + 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, + 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, // mrl 2 + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, // Mode 29 mrl 0 + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, + 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, // mrl 1 + 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, + 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, + 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, + 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x09, 0x0a, 0x0b, 0x0c, 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, // mrl 2 + 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x09, 0x0a, 0x0b, 0x0c, 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, + 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x09, 0x0a, 0x0b, 0x0c, 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, + 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x09, 0x0a, 0x0b, 0x0c, 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, + 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x09, 0x0a, 0x0b, 0x0c, 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, // Mode 30 mrl 0 + 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x09, 0x0a, 0x0b, 0x0c, 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, + 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x09, 0x0a, 0x0b, 0x0c, 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, + 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x09, 0x0a, 0x0b, 0x0c, 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, + 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x09, 0x0a, 0x0b, 0x0c, 0x09, 0x0a, 0x0b, 0x0c, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, // mrl 1 + 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x09, 0x0a, 0x0b, 0x0c, 0x09, 0x0a, 0x0b, 0x0c, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, + 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x09, 0x0a, 0x0b, 0x0c, 0x09, 0x0a, 0x0b, 0x0c, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, + 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x09, 0x0a, 0x0b, 0x0c, 0x09, 0x0a, 0x0b, 0x0c, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, + 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x0a, 0x0b, 0x0c, 0x0d, 0x09, 0x0a, 0x0b, 0x0c, 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, // mrl 2 + 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x0a, 0x0b, 0x0c, 0x0d, 0x09, 0x0a, 0x0b, 0x0c, 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, + 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x0a, 0x0b, 0x0c, 0x0d, 0x09, 0x0a, 0x0b, 0x0c, 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, + 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x0a, 0x0b, 0x0c, 0x0d, 0x09, 0x0a, 0x0b, 0x0c, 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, + 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x0b, 0x0c, 0x0d, 0x0e, 0x0a, 0x0b, 0x0c, 0x0d, 0x09, 0x0a, 0x0b, 0x0c, 0x09, 0x0a, 0x0b, 0x0c, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, // Mode 31 mrl 0 + 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x0a, 0x0b, 0x0c, 0x0d, 0x0a, 0x0b, 0x0c, 0x0d, 0x09, 0x0a, 0x0b, 0x0c, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, + 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x0b, 0x0c, 0x0d, 0x0e, 0x0a, 0x0b, 0x0c, 0x0d, 0x09, 0x0a, 0x0b, 0x0c, 0x09, 0x0a, 0x0b, 0x0c, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, + 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x0a, 0x0b, 0x0c, 0x0d, 0x0a, 0x0b, 0x0c, 0x0d, 0x09, 0x0a, 0x0b, 0x0c, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, + 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x0b, 0x0c, 0x0d, 0x0e, 0x0a, 0x0b, 0x0c, 0x0d, 0x0a, 0x0b, 0x0c, 0x0d, 0x09, 0x0a, 0x0b, 0x0c, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, // mrl 1 + 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x0b, 0x0c, 0x0d, 0x0e, 0x0a, 0x0b, 0x0c, 0x0d, 0x09, 0x0a, 0x0b, 0x0c, 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, + 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x0b, 0x0c, 0x0d, 0x0e, 0x0a, 0x0b, 0x0c, 0x0d, 0x0a, 0x0b, 0x0c, 0x0d, 0x09, 0x0a, 0x0b, 0x0c, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, + 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x0b, 0x0c, 0x0d, 0x0e, 0x0a, 0x0b, 0x0c, 0x0d, 0x09, 0x0a, 0x0b, 0x0c, 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x06, 0x07, 0x08, 0x09, + 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x0a, 0x0b, 0x0c, 0x0d, 0x0a, 0x0b, 0x0c, 0x0d, 0x09, 0x0a, 0x0b, 0x0c, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, // mrl 2 + 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x0b, 0x0c, 0x0d, 0x0e, 0x0a, 0x0b, 0x0c, 0x0d, 0x09, 0x0a, 0x0b, 0x0c, 0x09, 0x0a, 0x0b, 0x0c, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, + 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x0a, 0x0b, 0x0c, 0x0d, 0x0a, 0x0b, 0x0c, 0x0d, 0x09, 0x0a, 0x0b, 0x0c, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, + 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x0b, 0x0c, 0x0d, 0x0e, 0x0a, 0x0b, 0x0c, 0x0d, 0x09, 0x0a, 0x0b, 0x0c, 0x09, 0x0a, 0x0b, 0x0c, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, + 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x0c, 0x0d, 0x0e, 0x0f, 0x0b, 0x0c, 0x0d, 0x0e, 0x0a, 0x0b, 0x0c, 0x0d, 0x09, 0x0a, 0x0b, 0x0c, 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, // Mode 32 mrl 0 + 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x0c, 0x0d, 0x0e, 0x0f, 0x0b, 0x0c, 0x0d, 0x0e, 0x0a, 0x0b, 0x0c, 0x0d, 0x09, 0x0a, 0x0b, 0x0c, 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, + 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x0c, 0x0d, 0x0e, 0x0f, 0x0b, 0x0c, 0x0d, 0x0e, 0x0a, 0x0b, 0x0c, 0x0d, 0x09, 0x0a, 0x0b, 0x0c, 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, + 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x0c, 0x0d, 0x0e, 0x0f, 0x0b, 0x0c, 0x0d, 0x0e, 0x0a, 0x0b, 0x0c, 0x0d, 0x09, 0x0a, 0x0b, 0x0c, 0x08, 0x09, 0x0a, 0x0b, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, + 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, + 0x0c, 0x0d, 0x0e, 0x0f, 0x0b, 0x0c, 0x0d, 0x0e, 0x0a, 0x0b, 0x0c, 0x0d, 0x09, 0x0a, 0x0b, 0x0c, 0x09, 0x0a, 0x0b, 0x0c, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, // mrl 1 + 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x0c, 0x0d, 0x0e, 0x0f, 0x0b, 0x0c, 0x0d, 0x0e, 0x0a, 0x0b, 0x0c, 0x0d, 0x09, 0x0a, 0x0b, 0x0c, 0x09, 0x0a, 0x0b, 0x0c, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, + 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x0c, 0x0d, 0x0e, 0x0f, 0x0b, 0x0c, 0x0d, 0x0e, 0x0a, 0x0b, 0x0c, 0x0d, 0x09, 0x0a, 0x0b, 0x0c, 0x09, 0x0a, 0x0b, 0x0c, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, + 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x0c, 0x0d, 0x0e, 0x0f, 0x0b, 0x0c, 0x0d, 0x0e, 0x0a, 0x0b, 0x0c, 0x0d, 0x09, 0x0a, 0x0b, 0x0c, 0x09, 0x0a, 0x0b, 0x0c, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, + 0x05, 0x06, 0x07, 0x08, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x0c, 0x0d, 0x0e, 0x0f, 0x0b, 0x0c, 0x0d, 0x0e, 0x0a, 0x0b, 0x0c, 0x0d, 0x0a, 0x0b, 0x0c, 0x0d, 0x09, 0x0a, 0x0b, 0x0c, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, // mrl 2 + 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x0c, 0x0d, 0x0e, 0x0f, 0x0b, 0x0c, 0x0d, 0x0e, 0x0a, 0x0b, 0x0c, 0x0d, 0x0a, 0x0b, 0x0c, 0x0d, 0x09, 0x0a, 0x0b, 0x0c, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, + 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x0c, 0x0d, 0x0e, 0x0f, 0x0b, 0x0c, 0x0d, 0x0e, 0x0a, 0x0b, 0x0c, 0x0d, 0x0a, 0x0b, 0x0c, 0x0d, 0x09, 0x0a, 0x0b, 0x0c, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, + 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x0c, 0x0d, 0x0e, 0x0f, 0x0b, 0x0c, 0x0d, 0x0e, 0x0a, 0x0b, 0x0c, 0x0d, 0x0a, 0x0b, 0x0c, 0x0d, 0x09, 0x0a, 0x0b, 0x0c, 0x08, 0x09, 0x0a, 0x0b, 0x07, 0x08, 0x09, 0x0a, 0x06, 0x07, 0x08, 0x09, + 0x06, 0x07, 0x08, 0x09, 0x05, 0x06, 0x07, 0x08, 0x04, 0x05, 0x06, 0x07, 0x03, 0x04, 0x05, 0x06, 0x02, 0x03, 0x04, 0x05, 0x02, 0x03, 0x04, 0x05, 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x02, 0x03, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Mode 33 | not used + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Mode 34 | not used + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +}; + + +// Chroma linear interpolation filter weights for width 8, vertical modes. These also work for w16 and w32. +static ALIGNED(32) const int8_t intra_chroma_linear_interpolation_weights_w8_ver[] = { + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, // Mode 2 + 3, 29, 6, 26, 9, 23, 12, 20, 15, 17, 18, 14, 21, 11, 24, 8, 27, 5, 30, 2, 1, 31, 4, 28, 7, 25, 10, 22, 13, 19, 16, 16, 19, 13, 22, 10, 25, 7, 28, 4, 31, 1, 2, 30, 5, 27, 8, 24, 11, 21, 14, 18, 17, 15, 20, 12, 23, 9, 26, 6, 29, 3, 32, 0, // Mode 3 + 6, 26, 12, 20, 18, 14, 24, 8, 30, 2, 4, 28, 10, 22, 16, 16, 22, 10, 28, 4, 2, 30, 8, 24, 14, 18, 20, 12, 26, 6, 32, 0, 6, 26, 12, 20, 18, 14, 24, 8, 30, 2, 4, 28, 10, 22, 16, 16, 22, 10, 28, 4, 2, 30, 8, 24, 14, 18, 20, 12, 26, 6, 32, 0, // Mode 4 + 9, 23, 18, 14, 27, 5, 4, 28, 13, 19, 22, 10, 31, 1, 8, 24, 17, 15, 26, 6, 3, 29, 12, 20, 21, 11, 30, 2, 7, 25, 16, 16, 25, 7, 2, 30, 11, 21, 20, 12, 29, 3, 6, 26, 15, 17, 24, 8, 1, 31, 10, 22, 19, 13, 28, 4, 5, 27, 14, 18, 23, 9, 32, 0, // Mode 5 + 12, 20, 24, 8, 4, 28, 16, 16, 28, 4, 8, 24, 20, 12, 32, 0, 12, 20, 24, 8, 4, 28, 16, 16, 28, 4, 8, 24, 20, 12, 32, 0, 12, 20, 24, 8, 4, 28, 16, 16, 28, 4, 8, 24, 20, 12, 32, 0, 12, 20, 24, 8, 4, 28, 16, 16, 28, 4, 8, 24, 20, 12, 32, 0, // Mode 6 + 14, 18, 28, 4, 10, 22, 24, 8, 6, 26, 20, 12, 2, 30, 16, 16, 30, 2, 12, 20, 26, 6, 8, 24, 22, 10, 4, 28, 18, 14, 32, 0, 14, 18, 28, 4, 10, 22, 24, 8, 6, 26, 20, 12, 2, 30, 16, 16, 30, 2, 12, 20, 26, 6, 8, 24, 22, 10, 4, 28, 18, 14, 32, 0, // Mode 7 + 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, // Mode 8 + 18, 14, 4, 28, 22, 10, 8, 24, 26, 6, 12, 20, 30, 2, 16, 16, 2, 30, 20, 12, 6, 26, 24, 8, 10, 22, 28, 4, 14, 18, 32, 0, 18, 14, 4, 28, 22, 10, 8, 24, 26, 6, 12, 20, 30, 2, 16, 16, 2, 30, 20, 12, 6, 26, 24, 8, 10, 22, 28, 4, 14, 18, 32, 0, // Mode 9 + 20, 12, 8, 24, 28, 4, 16, 16, 4, 28, 24, 8, 12, 20, 32, 0, 20, 12, 8, 24, 28, 4, 16, 16, 4, 28, 24, 8, 12, 20, 32, 0, 20, 12, 8, 24, 28, 4, 16, 16, 4, 28, 24, 8, 12, 20, 32, 0, 20, 12, 8, 24, 28, 4, 16, 16, 4, 28, 24, 8, 12, 20, 32, 0, // Mode 10 + 22, 10, 12, 20, 2, 30, 24, 8, 14, 18, 4, 28, 26, 6, 16, 16, 6, 26, 28, 4, 18, 14, 8, 24, 30, 2, 20, 12, 10, 22, 32, 0, 22, 10, 12, 20, 2, 30, 24, 8, 14, 18, 4, 28, 26, 6, 16, 16, 6, 26, 28, 4, 18, 14, 8, 24, 30, 2, 20, 12, 10, 22, 32, 0, // Mode 11 + 24, 8, 16, 16, 8, 24, 32, 0, 24, 8, 16, 16, 8, 24, 32, 0, 24, 8, 16, 16, 8, 24, 32, 0, 24, 8, 16, 16, 8, 24, 32, 0, 24, 8, 16, 16, 8, 24, 32, 0, 24, 8, 16, 16, 8, 24, 32, 0, 24, 8, 16, 16, 8, 24, 32, 0, 24, 8, 16, 16, 8, 24, 32, 0, // Mode 12 + 26, 6, 20, 12, 14, 18, 8, 24, 2, 30, 28, 4, 22, 10, 16, 16, 10, 22, 4, 28, 30, 2, 24, 8, 18, 14, 12, 20, 6, 26, 32, 0, 26, 6, 20, 12, 14, 18, 8, 24, 2, 30, 28, 4, 22, 10, 16, 16, 10, 22, 4, 28, 30, 2, 24, 8, 18, 14, 12, 20, 6, 26, 32, 0, // Mode 13 + 28, 4, 24, 8, 20, 12, 16, 16, 12, 20, 8, 24, 4, 28, 32, 0, 28, 4, 24, 8, 20, 12, 16, 16, 12, 20, 8, 24, 4, 28, 32, 0, 28, 4, 24, 8, 20, 12, 16, 16, 12, 20, 8, 24, 4, 28, 32, 0, 28, 4, 24, 8, 20, 12, 16, 16, 12, 20, 8, 24, 4, 28, 32, 0, // Mode 14 + 29, 3, 26, 6, 23, 9, 20, 12, 17, 15, 14, 18, 11, 21, 8, 24, 5, 27, 2, 30, 31, 1, 28, 4, 25, 7, 22, 10, 19, 13, 16, 16, 13, 19, 10, 22, 7, 25, 4, 28, 1, 31, 30, 2, 27, 5, 24, 8, 21, 11, 18, 14, 15, 17, 12, 20, 9, 23, 6, 26, 3, 29, 32, 0, // Mode 15 + 30, 2, 28, 4, 26, 6, 24, 8, 22, 10, 20, 12, 18, 14, 16, 16, 14, 18, 12, 20, 10, 22, 8, 24, 6, 26, 4, 28, 2, 30, 32, 0, 30, 2, 28, 4, 26, 6, 24, 8, 22, 10, 20, 12, 18, 14, 16, 16, 14, 18, 12, 20, 10, 22, 8, 24, 6, 26, 4, 28, 2, 30, 32, 0, // Mode 16 + 31, 1, 30, 2, 29, 3, 28, 4, 27, 5, 26, 6, 25, 7, 24, 8, 23, 9, 22, 10, 21, 11, 20, 12, 19, 13, 18, 14, 17, 15, 16, 16, 15, 17, 14, 18, 13, 19, 12, 20, 11, 21, 10, 22, 9, 23, 8, 24, 7, 25, 6, 26, 5, 27, 4, 28, 3, 29, 2, 30, 1, 31, 32, 0, // Mode 17 + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, // Mode 18 + 1, 31, 2, 30, 3, 29, 4, 28, 5, 27, 6, 26, 7, 25, 8, 24, 9, 23, 10, 22, 11, 21, 12, 20, 13, 19, 14, 18, 15, 17, 16, 16, 17, 15, 18, 14, 19, 13, 20, 12, 21, 11, 22, 10, 23, 9, 24, 8, 25, 7, 26, 6, 27, 5, 28, 4, 29, 3, 30, 2, 31, 1, 32, 0, // Mode 19 + 2, 30, 4, 28, 6, 26, 8, 24, 10, 22, 12, 20, 14, 18, 16, 16, 18, 14, 20, 12, 22, 10, 24, 8, 26, 6, 28, 4, 30, 2, 32, 0, 2, 30, 4, 28, 6, 26, 8, 24, 10, 22, 12, 20, 14, 18, 16, 16, 18, 14, 20, 12, 22, 10, 24, 8, 26, 6, 28, 4, 30, 2, 32, 0, // Mode 20 + 3, 29, 6, 26, 9, 23, 12, 20, 15, 17, 18, 14, 21, 11, 24, 8, 27, 5, 30, 2, 1, 31, 4, 28, 7, 25, 10, 22, 13, 19, 16, 16, 19, 13, 22, 10, 25, 7, 28, 4, 31, 1, 2, 30, 5, 27, 8, 24, 11, 21, 14, 18, 17, 15, 20, 12, 23, 9, 26, 6, 29, 3, 32, 0, // Mode 21 + 4, 28, 8, 24, 12, 20, 16, 16, 20, 12, 24, 8, 28, 4, 32, 0, 4, 28, 8, 24, 12, 20, 16, 16, 20, 12, 24, 8, 28, 4, 32, 0, 4, 28, 8, 24, 12, 20, 16, 16, 20, 12, 24, 8, 28, 4, 32, 0, 4, 28, 8, 24, 12, 20, 16, 16, 20, 12, 24, 8, 28, 4, 32, 0, // Mode 22 + 6, 26, 12, 20, 18, 14, 24, 8, 30, 2, 4, 28, 10, 22, 16, 16, 22, 10, 28, 4, 2, 30, 8, 24, 14, 18, 20, 12, 26, 6, 32, 0, 6, 26, 12, 20, 18, 14, 24, 8, 30, 2, 4, 28, 10, 22, 16, 16, 22, 10, 28, 4, 2, 30, 8, 24, 14, 18, 20, 12, 26, 6, 32, 0, // Mode 23 + 8, 24, 16, 16, 24, 8, 32, 0, 8, 24, 16, 16, 24, 8, 32, 0, 8, 24, 16, 16, 24, 8, 32, 0, 8, 24, 16, 16, 24, 8, 32, 0, 8, 24, 16, 16, 24, 8, 32, 0, 8, 24, 16, 16, 24, 8, 32, 0, 8, 24, 16, 16, 24, 8, 32, 0, 8, 24, 16, 16, 24, 8, 32, 0, // Mode 24 + 10, 22, 20, 12, 30, 2, 8, 24, 18, 14, 28, 4, 6, 26, 16, 16, 26, 6, 4, 28, 14, 18, 24, 8, 2, 30, 12, 20, 22, 10, 32, 0, 10, 22, 20, 12, 30, 2, 8, 24, 18, 14, 28, 4, 6, 26, 16, 16, 26, 6, 4, 28, 14, 18, 24, 8, 2, 30, 12, 20, 22, 10, 32, 0, // Mode 25 + 12, 20, 24, 8, 4, 28, 16, 16, 28, 4, 8, 24, 20, 12, 32, 0, 12, 20, 24, 8, 4, 28, 16, 16, 28, 4, 8, 24, 20, 12, 32, 0, 12, 20, 24, 8, 4, 28, 16, 16, 28, 4, 8, 24, 20, 12, 32, 0, 12, 20, 24, 8, 4, 28, 16, 16, 28, 4, 8, 24, 20, 12, 32, 0, // Mode 26 + 14, 18, 28, 4, 10, 22, 24, 8, 6, 26, 20, 12, 2, 30, 16, 16, 30, 2, 12, 20, 26, 6, 8, 24, 22, 10, 4, 28, 18, 14, 32, 0, 14, 18, 28, 4, 10, 22, 24, 8, 6, 26, 20, 12, 2, 30, 16, 16, 30, 2, 12, 20, 26, 6, 8, 24, 22, 10, 4, 28, 18, 14, 32, 0, // Mode 27 + 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, // Mode 28 + 18, 14, 4, 28, 22, 10, 8, 24, 26, 6, 12, 20, 30, 2, 16, 16, 2, 30, 20, 12, 6, 26, 24, 8, 10, 22, 28, 4, 14, 18, 32, 0, 18, 14, 4, 28, 22, 10, 8, 24, 26, 6, 12, 20, 30, 2, 16, 16, 2, 30, 20, 12, 6, 26, 24, 8, 10, 22, 28, 4, 14, 18, 32, 0, // Mode 29 + 20, 12, 8, 24, 28, 4, 16, 16, 4, 28, 24, 8, 12, 20, 32, 0, 20, 12, 8, 24, 28, 4, 16, 16, 4, 28, 24, 8, 12, 20, 32, 0, 20, 12, 8, 24, 28, 4, 16, 16, 4, 28, 24, 8, 12, 20, 32, 0, 20, 12, 8, 24, 28, 4, 16, 16, 4, 28, 24, 8, 12, 20, 32, 0, // Mode 30 + 23, 9, 14, 18, 5, 27, 28, 4, 19, 13, 10, 22, 1, 31, 24, 8, 15, 17, 6, 26, 29, 3, 20, 12, 11, 21, 2, 30, 25, 7, 16, 16, 7, 25, 30, 2, 21, 11, 12, 20, 3, 29, 26, 6, 17, 15, 8, 24, 31, 1, 22, 10, 13, 19, 4, 28, 27, 5, 18, 14, 9, 23, 32, 0, // Mode 31 + 26, 6, 20, 12, 14, 18, 8, 24, 2, 30, 28, 4, 22, 10, 16, 16, 10, 22, 4, 28, 30, 2, 24, 8, 18, 14, 12, 20, 6, 26, 32, 0, 26, 6, 20, 12, 14, 18, 8, 24, 2, 30, 28, 4, 22, 10, 16, 16, 10, 22, 4, 28, 30, 2, 24, 8, 18, 14, 12, 20, 6, 26, 32, 0, // Mode 32 + 29, 3, 26, 6, 23, 9, 20, 12, 17, 15, 14, 18, 11, 21, 8, 24, 5, 27, 2, 30, 31, 1, 28, 4, 25, 7, 22, 10, 19, 13, 16, 16, 13, 19, 10, 22, 7, 25, 4, 28, 1, 31, 30, 2, 27, 5, 24, 8, 21, 11, 18, 14, 15, 17, 12, 20, 9, 23, 6, 26, 3, 29, 32, 0, // Mode 33 + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, // Mode 34 +}; + +// Chroma linear interpolation filter weights for width 8, vertical wide angle modes. These also work for w16 and w32. +static ALIGNED(32) const int8_t intra_chroma_linear_interpolation_weights_w8_ver_wide_angle[] = { + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, // Mode -12 Offset 0 + 11, 21, 22, 10, 1, 31, 12, 20, 23, 9, 2, 30, 13, 19, 24, 8, 3, 29, 14, 18, 25, 7, 4, 28, 15, 17, 26, 6, 5, 27, 16, 16, 27, 5, 6, 26, 17, 15, 28, 4, 7, 25, 18, 14, 29, 3, 8, 24, 19, 13, 30, 2, 9, 23, 20, 12, 31, 1, 10, 22, 21, 11, 32, 0, // Mode -11 Offset 64 + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, // Mode -10 Offset 128 + 21, 11, 10, 22, 31, 1, 20, 12, 9, 23, 30, 2, 19, 13, 8, 24, 29, 3, 18, 14, 7, 25, 28, 4, 17, 15, 6, 26, 27, 5, 16, 16, 5, 27, 26, 6, 15, 17, 4, 28, 25, 7, 14, 18, 3, 29, 24, 8, 13, 19, 2, 30, 23, 9, 12, 20, 1, 31, 22, 10, 11, 21, 32, 0, // Mode -9 Offset 192 + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, // Mode -8 Offset 256 + 26, 6, 20, 12, 14, 18, 8, 24, 2, 30, 28, 4, 22, 10, 16, 16, 10, 22, 4, 28, 30, 2, 24, 8, 18, 14, 12, 20, 6, 26, 32, 0, 26, 6, 20, 12, 14, 18, 8, 24, 2, 30, 28, 4, 22, 10, 16, 16, 10, 22, 4, 28, 30, 2, 24, 8, 18, 14, 12, 20, 6, 26, 32, 0, // Mode -7 Offset 320 + 10, 22, 20, 12, 30, 2, 8, 24, 18, 14, 28, 4, 6, 26, 16, 16, 26, 6, 4, 28, 14, 18, 24, 8, 2, 30, 12, 20, 22, 10, 32, 0, 10, 22, 20, 12, 30, 2, 8, 24, 18, 14, 28, 4, 6, 26, 16, 16, 26, 6, 4, 28, 14, 18, 24, 8, 2, 30, 12, 20, 22, 10, 32, 0, // Mode -6 Offset 384 + 23, 9, 14, 18, 5, 27, 28, 4, 19, 13, 10, 22, 1, 31, 24, 8, 15, 17, 6, 26, 29, 3, 20, 12, 11, 21, 2, 30, 25, 7, 16, 16, 7, 25, 30, 2, 21, 11, 12, 20, 3, 29, 26, 6, 17, 15, 8, 24, 31, 1, 22, 10, 13, 19, 4, 28, 27, 5, 18, 14, 9, 23, 32, 0, // Mode -5 Offset 448 + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, // Mode -4 Offset 512 + 7, 25, 14, 18, 21, 11, 28, 4, 3, 29, 10, 22, 17, 15, 24, 8, 31, 1, 6, 26, 13, 19, 20, 12, 27, 5, 2, 30, 9, 23, 16, 16, 23, 9, 30, 2, 5, 27, 12, 20, 19, 13, 26, 6, 1, 31, 8, 24, 15, 17, 22, 10, 29, 3, 4, 28, 11, 21, 18, 14, 25, 7, 32, 0, // Mode -3 Offset 576 + 13, 19, 26, 6, 7, 25, 20, 12, 1, 31, 14, 18, 27, 5, 8, 24, 21, 11, 2, 30, 15, 17, 28, 4, 9, 23, 22, 10, 3, 29, 16, 16, 29, 3, 10, 22, 23, 9, 4, 28, 17, 15, 30, 2, 11, 21, 24, 8, 5, 27, 18, 14, 31, 1, 12, 20, 25, 7, 6, 26, 19, 13, 32, 0, // Mode -2 Offset 640 + 19, 13, 6, 26, 25, 7, 12, 20, 31, 1, 18, 14, 5, 27, 24, 8, 11, 21, 30, 2, 17, 15, 4, 28, 23, 9, 10, 22, 29, 3, 16, 16, 3, 29, 22, 10, 9, 23, 28, 4, 15, 17, 2, 30, 21, 11, 8, 24, 27, 5, 14, 18, 1, 31, 20, 12, 7, 25, 26, 6, 13, 19, 32, 0, // Mode -1 Offset 704 + 25, 7, 18, 14, 11, 21, 4, 28, 29, 3, 22, 10, 15, 17, 8, 24, 1, 31, 26, 6, 19, 13, 12, 20, 5, 27, 30, 2, 23, 9, 16, 16, 9, 23, 2, 30, 27, 5, 20, 12, 13, 19, 6, 26, 31, 1, 24, 8, 17, 15, 10, 22, 3, 29, 28, 4, 21, 11, 14, 18, 7, 25, 32, 0, // Mode 0 Offset 768 + 29, 3, 26, 6, 23, 9, 20, 12, 17, 15, 14, 18, 11, 21, 8, 24, 5, 27, 2, 30, 31, 1, 28, 4, 25, 7, 22, 10, 19, 13, 16, 16, 13, 19, 10, 22, 7, 25, 4, 28, 1, 31, 30, 2, 27, 5, 24, 8, 21, 11, 18, 14, 15, 17, 12, 20, 9, 23, 6, 26, 3, 29, 32, 0, // Mode 1 Offset 832 +}; + +// Chroma linear interpolation filter weights for width 4, horizontal modes +static ALIGNED(32) const int8_t intra_chroma_linear_interpolation_weights_w4_hor[] = { + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, // Mode 2 + 3, 29, 6, 26, 9, 23, 12, 20, 3, 29, 6, 26, 9, 23, 12, 20, // Mode 3 + 6, 26, 12, 20, 18, 14, 24, 8, 6, 26, 12, 20, 18, 14, 24, 8, // Mode 4 + 9, 23, 18, 14, 27, 5, 4, 28, 9, 23, 18, 14, 27, 5, 4, 28, // Mode 5 + 12, 20, 24, 8, 4, 28, 16, 16, 12, 20, 24, 8, 4, 28, 16, 16, // Mode 6 + 14, 18, 28, 4, 10, 22, 24, 8, 14, 18, 28, 4, 10, 22, 24, 8, // Mode 7 + 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, // Mode 8 + 18, 14, 4, 28, 22, 10, 8, 24, 18, 14, 4, 28, 22, 10, 8, 24, // Mode 9 + 20, 12, 8, 24, 28, 4, 16, 16, 20, 12, 8, 24, 28, 4, 16, 16, // Mode 10 + 22, 10, 12, 20, 2, 30, 24, 8, 22, 10, 12, 20, 2, 30, 24, 8, // Mode 11 + 24, 8, 16, 16, 8, 24, 32, 0, 24, 8, 16, 16, 8, 24, 32, 0, // Mode 12 + 26, 6, 20, 12, 14, 18, 8, 24, 26, 6, 20, 12, 14, 18, 8, 24, // Mode 13 + 28, 4, 24, 8, 20, 12, 16, 16, 28, 4, 24, 8, 20, 12, 16, 16, // Mode 14 + 29, 3, 26, 6, 23, 9, 20, 12, 29, 3, 26, 6, 23, 9, 20, 12, // Mode 15 + 30, 2, 28, 4, 26, 6, 24, 8, 30, 2, 28, 4, 26, 6, 24, 8, // Mode 16 + 31, 1, 30, 2, 29, 3, 28, 4, 31, 1, 30, 2, 29, 3, 28, 4, // Mode 17 + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, // Mode 18 + 1, 31, 2, 30, 3, 29, 4, 28, 1, 31, 2, 30, 3, 29, 4, 28, // Mode 19 + 2, 30, 4, 28, 6, 26, 8, 24, 2, 30, 4, 28, 6, 26, 8, 24, // Mode 20 + 3, 29, 6, 26, 9, 23, 12, 20, 3, 29, 6, 26, 9, 23, 12, 20, // Mode 21 + 4, 28, 8, 24, 12, 20, 16, 16, 4, 28, 8, 24, 12, 20, 16, 16, // Mode 22 + 6, 26, 12, 20, 18, 14, 24, 8, 6, 26, 12, 20, 18, 14, 24, 8, // Mode 23 + 8, 24, 16, 16, 24, 8, 32, 0, 8, 24, 16, 16, 24, 8, 32, 0, // Mode 24 + 10, 22, 20, 12, 30, 2, 8, 24, 10, 22, 20, 12, 30, 2, 8, 24, // Mode 25 + 12, 20, 24, 8, 4, 28, 16, 16, 12, 20, 24, 8, 4, 28, 16, 16, // Mode 26 + 14, 18, 28, 4, 10, 22, 24, 8, 14, 18, 28, 4, 10, 22, 24, 8, // Mode 27 + 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, // Mode 28 + 18, 14, 4, 28, 22, 10, 8, 24, 18, 14, 4, 28, 22, 10, 8, 24, // Mode 29 + 20, 12, 8, 24, 28, 4, 16, 16, 20, 12, 8, 24, 28, 4, 16, 16, // Mode 30 + 23, 9, 14, 18, 5, 27, 28, 4, 23, 9, 14, 18, 5, 27, 28, 4, // Mode 31 + 26, 6, 20, 12, 14, 18, 8, 24, 26, 6, 20, 12, 14, 18, 8, 24, // Mode 32 + 29, 3, 26, 6, 23, 9, 20, 12, 29, 3, 26, 6, 23, 9, 20, 12, // Mode 33 + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, // Mode 34 +}; + + +// Chroma linear interpolation filter weights for width 8, horizontal modes +static ALIGNED(32) const int8_t intra_chroma_linear_interpolation_weights_w8_hor[] = { + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, // Mode 2 + 3, 29, 6, 26, 9, 23, 12, 20, 15, 17, 18, 14, 21, 11, 24, 8, // Mode 3 + 6, 26, 12, 20, 18, 14, 24, 8, 30, 2, 4, 28, 10, 22, 16, 16, // Mode 4 + 9, 23, 18, 14, 27, 5, 4, 28, 13, 19, 22, 10, 31, 1, 8, 24, // Mode 5 + 12, 20, 24, 8, 4, 28, 16, 16, 28, 4, 8, 24, 20, 12, 32, 0, // Mode 6 + 14, 18, 28, 4, 10, 22, 24, 8, 6, 26, 20, 12, 2, 30, 16, 16, // Mode 7 + 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, // Mode 8 + 18, 14, 4, 28, 22, 10, 8, 24, 26, 6, 12, 20, 30, 2, 16, 16, // Mode 9 + 20, 12, 8, 24, 28, 4, 16, 16, 4, 28, 24, 8, 12, 20, 32, 0, // Mode 10 + 22, 10, 12, 20, 2, 30, 24, 8, 14, 18, 4, 28, 26, 6, 16, 16, // Mode 11 + 24, 8, 16, 16, 8, 24, 32, 0, 24, 8, 16, 16, 8, 24, 32, 0, // Mode 12 + 26, 6, 20, 12, 14, 18, 8, 24, 2, 30, 28, 4, 22, 10, 16, 16, // Mode 13 + 28, 4, 24, 8, 20, 12, 16, 16, 12, 20, 8, 24, 4, 28, 32, 0, // Mode 14 + 29, 3, 26, 6, 23, 9, 20, 12, 17, 15, 14, 18, 11, 21, 8, 24, // Mode 15 + 30, 2, 28, 4, 26, 6, 24, 8, 22, 10, 20, 12, 18, 14, 16, 16, // Mode 16 + 31, 1, 30, 2, 29, 3, 28, 4, 27, 5, 26, 6, 25, 7, 24, 8, // Mode 17 + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, // Mode 18 + 1, 31, 2, 30, 3, 29, 4, 28, 5, 27, 6, 26, 7, 25, 8, 24, // Mode 19 + 2, 30, 4, 28, 6, 26, 8, 24, 10, 22, 12, 20, 14, 18, 16, 16, // Mode 20 + 3, 29, 6, 26, 9, 23, 12, 20, 15, 17, 18, 14, 21, 11, 24, 8, // Mode 21 + 4, 28, 8, 24, 12, 20, 16, 16, 20, 12, 24, 8, 28, 4, 32, 0, // Mode 22 + 6, 26, 12, 20, 18, 14, 24, 8, 30, 2, 4, 28, 10, 22, 16, 16, // Mode 23 + 8, 24, 16, 16, 24, 8, 32, 0, 8, 24, 16, 16, 24, 8, 32, 0, // Mode 24 + 10, 22, 20, 12, 30, 2, 8, 24, 18, 14, 28, 4, 6, 26, 16, 16, // Mode 25 + 12, 20, 24, 8, 4, 28, 16, 16, 28, 4, 8, 24, 20, 12, 32, 0, // Mode 26 + 14, 18, 28, 4, 10, 22, 24, 8, 6, 26, 20, 12, 2, 30, 16, 16, // Mode 27 + 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, // Mode 28 + 18, 14, 4, 28, 22, 10, 8, 24, 26, 6, 12, 20, 30, 2, 16, 16, // Mode 29 + 20, 12, 8, 24, 28, 4, 16, 16, 4, 28, 24, 8, 12, 20, 32, 0, // Mode 30 + 23, 9, 14, 18, 5, 27, 28, 4, 19, 13, 10, 22, 1, 31, 24, 8, // Mode 31 + 26, 6, 20, 12, 14, 18, 8, 24, 2, 30, 28, 4, 22, 10, 16, 16, // Mode 32 + 29, 3, 26, 6, 23, 9, 20, 12, 17, 15, 14, 18, 11, 21, 8, 24, // Mode 33 + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, // Mode 34 +}; + + +// Chroma linear interpolation filter weights for width 16, horizontal modes. +static ALIGNED(32) const int8_t intra_chroma_linear_interpolation_weights_w16_hor[] = { + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, // Mode 2 + 3, 29, 6, 26, 9, 23, 12, 20, 15, 17, 18, 14, 21, 11, 24, 8, 27, 5, 30, 2, 1, 31, 4, 28, 7, 25, 10, 22, 13, 19, 16, 16, // Mode 3 + 6, 26, 12, 20, 18, 14, 24, 8, 30, 2, 4, 28, 10, 22, 16, 16, 22, 10, 28, 4, 2, 30, 8, 24, 14, 18, 20, 12, 26, 6, 32, 0, // Mode 4 + 9, 23, 18, 14, 27, 5, 4, 28, 13, 19, 22, 10, 31, 1, 8, 24, 17, 15, 26, 6, 3, 29, 12, 20, 21, 11, 30, 2, 7, 25, 16, 16, // Mode 5 + 12, 20, 24, 8, 4, 28, 16, 16, 28, 4, 8, 24, 20, 12, 32, 0, 12, 20, 24, 8, 4, 28, 16, 16, 28, 4, 8, 24, 20, 12, 32, 0, // Mode 6 + 14, 18, 28, 4, 10, 22, 24, 8, 6, 26, 20, 12, 2, 30, 16, 16, 30, 2, 12, 20, 26, 6, 8, 24, 22, 10, 4, 28, 18, 14, 32, 0, // Mode 7 + 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, // Mode 8 + 18, 14, 4, 28, 22, 10, 8, 24, 26, 6, 12, 20, 30, 2, 16, 16, 2, 30, 20, 12, 6, 26, 24, 8, 10, 22, 28, 4, 14, 18, 32, 0, // Mode 9 + 20, 12, 8, 24, 28, 4, 16, 16, 4, 28, 24, 8, 12, 20, 32, 0, 20, 12, 8, 24, 28, 4, 16, 16, 4, 28, 24, 8, 12, 20, 32, 0, // Mode 10 + 22, 10, 12, 20, 2, 30, 24, 8, 14, 18, 4, 28, 26, 6, 16, 16, 6, 26, 28, 4, 18, 14, 8, 24, 30, 2, 20, 12, 10, 22, 32, 0, // Mode 11 + 24, 8, 16, 16, 8, 24, 32, 0, 24, 8, 16, 16, 8, 24, 32, 0, 24, 8, 16, 16, 8, 24, 32, 0, 24, 8, 16, 16, 8, 24, 32, 0, // Mode 12 + 26, 6, 20, 12, 14, 18, 8, 24, 2, 30, 28, 4, 22, 10, 16, 16, 10, 22, 4, 28, 30, 2, 24, 8, 18, 14, 12, 20, 6, 26, 32, 0, // Mode 13 + 28, 4, 24, 8, 20, 12, 16, 16, 12, 20, 8, 24, 4, 28, 32, 0, 28, 4, 24, 8, 20, 12, 16, 16, 12, 20, 8, 24, 4, 28, 32, 0, // Mode 14 + 29, 3, 26, 6, 23, 9, 20, 12, 17, 15, 14, 18, 11, 21, 8, 24, 5, 27, 2, 30, 31, 1, 28, 4, 25, 7, 22, 10, 19, 13, 16, 16, // Mode 15 + 30, 2, 28, 4, 26, 6, 24, 8, 22, 10, 20, 12, 18, 14, 16, 16, 14, 18, 12, 20, 10, 22, 8, 24, 6, 26, 4, 28, 2, 30, 32, 0, // Mode 16 + 31, 1, 30, 2, 29, 3, 28, 4, 27, 5, 26, 6, 25, 7, 24, 8, 23, 9, 22, 10, 21, 11, 20, 12, 19, 13, 18, 14, 17, 15, 16, 16, // Mode 17 + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, // Mode 18 + 1, 31, 2, 30, 3, 29, 4, 28, 5, 27, 6, 26, 7, 25, 8, 24, 9, 23, 10, 22, 11, 21, 12, 20, 13, 19, 14, 18, 15, 17, 16, 16, // Mode 19 + 2, 30, 4, 28, 6, 26, 8, 24, 10, 22, 12, 20, 14, 18, 16, 16, 18, 14, 20, 12, 22, 10, 24, 8, 26, 6, 28, 4, 30, 2, 32, 0, // Mode 20 + 3, 29, 6, 26, 9, 23, 12, 20, 15, 17, 18, 14, 21, 11, 24, 8, 27, 5, 30, 2, 1, 31, 4, 28, 7, 25, 10, 22, 13, 19, 16, 16, // Mode 21 + 4, 28, 8, 24, 12, 20, 16, 16, 20, 12, 24, 8, 28, 4, 32, 0, 4, 28, 8, 24, 12, 20, 16, 16, 20, 12, 24, 8, 28, 4, 32, 0, // Mode 22 + 6, 26, 12, 20, 18, 14, 24, 8, 30, 2, 4, 28, 10, 22, 16, 16, 22, 10, 28, 4, 2, 30, 8, 24, 14, 18, 20, 12, 26, 6, 32, 0, // Mode 23 + 8, 24, 16, 16, 24, 8, 32, 0, 8, 24, 16, 16, 24, 8, 32, 0, 8, 24, 16, 16, 24, 8, 32, 0, 8, 24, 16, 16, 24, 8, 32, 0, // Mode 24 + 10, 22, 20, 12, 30, 2, 8, 24, 18, 14, 28, 4, 6, 26, 16, 16, 26, 6, 4, 28, 14, 18, 24, 8, 2, 30, 12, 20, 22, 10, 32, 0, // Mode 25 + 12, 20, 24, 8, 4, 28, 16, 16, 28, 4, 8, 24, 20, 12, 32, 0, 12, 20, 24, 8, 4, 28, 16, 16, 28, 4, 8, 24, 20, 12, 32, 0, // Mode 26 + 14, 18, 28, 4, 10, 22, 24, 8, 6, 26, 20, 12, 2, 30, 16, 16, 30, 2, 12, 20, 26, 6, 8, 24, 22, 10, 4, 28, 18, 14, 32, 0, // Mode 27 + 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, // Mode 28 + 18, 14, 4, 28, 22, 10, 8, 24, 26, 6, 12, 20, 30, 2, 16, 16, 2, 30, 20, 12, 6, 26, 24, 8, 10, 22, 28, 4, 14, 18, 32, 0, // Mode 29 + 20, 12, 8, 24, 28, 4, 16, 16, 4, 28, 24, 8, 12, 20, 32, 0, 20, 12, 8, 24, 28, 4, 16, 16, 4, 28, 24, 8, 12, 20, 32, 0, // Mode 30 + 23, 9, 14, 18, 5, 27, 28, 4, 19, 13, 10, 22, 1, 31, 24, 8, 15, 17, 6, 26, 29, 3, 20, 12, 11, 21, 2, 30, 25, 7, 16, 16, // Mode 31 + 26, 6, 20, 12, 14, 18, 8, 24, 2, 30, 28, 4, 22, 10, 16, 16, 10, 22, 4, 28, 30, 2, 24, 8, 18, 14, 12, 20, 6, 26, 32, 0, // Mode 32 + 29, 3, 26, 6, 23, 9, 20, 12, 17, 15, 14, 18, 11, 21, 8, 24, 5, 27, 2, 30, 31, 1, 28, 4, 25, 7, 22, 10, 19, 13, 16, 16, // Mode 33 + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, // Mode 34 +}; + + +// Chroma linear interpolation filter weights for width 32, horizontal modes. +static ALIGNED(32) const int8_t intra_chroma_linear_interpolation_weights_w32_hor[] = { + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, // Mode 2 + 3, 29, 6, 26, 9, 23, 12, 20, 15, 17, 18, 14, 21, 11, 24, 8, 27, 5, 30, 2, 1, 31, 4, 28, 7, 25, 10, 22, 13, 19, 16, 16, 19, 13, 22, 10, 25, 7, 28, 4, 31, 1, 2, 30, 5, 27, 8, 24, 11, 21, 14, 18, 17, 15, 20, 12, 23, 9, 26, 6, 29, 3, 32, 0, // Mode 3 + 6, 26, 12, 20, 18, 14, 24, 8, 30, 2, 4, 28, 10, 22, 16, 16, 22, 10, 28, 4, 2, 30, 8, 24, 14, 18, 20, 12, 26, 6, 32, 0, 6, 26, 12, 20, 18, 14, 24, 8, 30, 2, 4, 28, 10, 22, 16, 16, 22, 10, 28, 4, 2, 30, 8, 24, 14, 18, 20, 12, 26, 6, 32, 0, // Mode 4 + 9, 23, 18, 14, 27, 5, 4, 28, 13, 19, 22, 10, 31, 1, 8, 24, 17, 15, 26, 6, 3, 29, 12, 20, 21, 11, 30, 2, 7, 25, 16, 16, 25, 7, 2, 30, 11, 21, 20, 12, 29, 3, 6, 26, 15, 17, 24, 8, 1, 31, 10, 22, 19, 13, 28, 4, 5, 27, 14, 18, 23, 9, 32, 0, // Mode 5 + 12, 20, 24, 8, 4, 28, 16, 16, 28, 4, 8, 24, 20, 12, 32, 0, 12, 20, 24, 8, 4, 28, 16, 16, 28, 4, 8, 24, 20, 12, 32, 0, 12, 20, 24, 8, 4, 28, 16, 16, 28, 4, 8, 24, 20, 12, 32, 0, 12, 20, 24, 8, 4, 28, 16, 16, 28, 4, 8, 24, 20, 12, 32, 0, // Mode 6 + 14, 18, 28, 4, 10, 22, 24, 8, 6, 26, 20, 12, 2, 30, 16, 16, 30, 2, 12, 20, 26, 6, 8, 24, 22, 10, 4, 28, 18, 14, 32, 0, 14, 18, 28, 4, 10, 22, 24, 8, 6, 26, 20, 12, 2, 30, 16, 16, 30, 2, 12, 20, 26, 6, 8, 24, 22, 10, 4, 28, 18, 14, 32, 0, // Mode 7 + 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, // Mode 8 + 18, 14, 4, 28, 22, 10, 8, 24, 26, 6, 12, 20, 30, 2, 16, 16, 2, 30, 20, 12, 6, 26, 24, 8, 10, 22, 28, 4, 14, 18, 32, 0, 18, 14, 4, 28, 22, 10, 8, 24, 26, 6, 12, 20, 30, 2, 16, 16, 2, 30, 20, 12, 6, 26, 24, 8, 10, 22, 28, 4, 14, 18, 32, 0, // Mode 9 + 20, 12, 8, 24, 28, 4, 16, 16, 4, 28, 24, 8, 12, 20, 32, 0, 20, 12, 8, 24, 28, 4, 16, 16, 4, 28, 24, 8, 12, 20, 32, 0, 20, 12, 8, 24, 28, 4, 16, 16, 4, 28, 24, 8, 12, 20, 32, 0, 20, 12, 8, 24, 28, 4, 16, 16, 4, 28, 24, 8, 12, 20, 32, 0, // Mode 10 + 22, 10, 12, 20, 2, 30, 24, 8, 14, 18, 4, 28, 26, 6, 16, 16, 6, 26, 28, 4, 18, 14, 8, 24, 30, 2, 20, 12, 10, 22, 32, 0, 22, 10, 12, 20, 2, 30, 24, 8, 14, 18, 4, 28, 26, 6, 16, 16, 6, 26, 28, 4, 18, 14, 8, 24, 30, 2, 20, 12, 10, 22, 32, 0, // Mode 11 + 24, 8, 16, 16, 8, 24, 32, 0, 24, 8, 16, 16, 8, 24, 32, 0, 24, 8, 16, 16, 8, 24, 32, 0, 24, 8, 16, 16, 8, 24, 32, 0, 24, 8, 16, 16, 8, 24, 32, 0, 24, 8, 16, 16, 8, 24, 32, 0, 24, 8, 16, 16, 8, 24, 32, 0, 24, 8, 16, 16, 8, 24, 32, 0, // Mode 12 + 26, 6, 20, 12, 14, 18, 8, 24, 2, 30, 28, 4, 22, 10, 16, 16, 10, 22, 4, 28, 30, 2, 24, 8, 18, 14, 12, 20, 6, 26, 32, 0, 26, 6, 20, 12, 14, 18, 8, 24, 2, 30, 28, 4, 22, 10, 16, 16, 10, 22, 4, 28, 30, 2, 24, 8, 18, 14, 12, 20, 6, 26, 32, 0, // Mode 13 + 28, 4, 24, 8, 20, 12, 16, 16, 12, 20, 8, 24, 4, 28, 32, 0, 28, 4, 24, 8, 20, 12, 16, 16, 12, 20, 8, 24, 4, 28, 32, 0, 28, 4, 24, 8, 20, 12, 16, 16, 12, 20, 8, 24, 4, 28, 32, 0, 28, 4, 24, 8, 20, 12, 16, 16, 12, 20, 8, 24, 4, 28, 32, 0, // Mode 14 + 29, 3, 26, 6, 23, 9, 20, 12, 17, 15, 14, 18, 11, 21, 8, 24, 5, 27, 2, 30, 31, 1, 28, 4, 25, 7, 22, 10, 19, 13, 16, 16, 13, 19, 10, 22, 7, 25, 4, 28, 1, 31, 30, 2, 27, 5, 24, 8, 21, 11, 18, 14, 15, 17, 12, 20, 9, 23, 6, 26, 3, 29, 32, 0, // Mode 15 + 30, 2, 28, 4, 26, 6, 24, 8, 22, 10, 20, 12, 18, 14, 16, 16, 14, 18, 12, 20, 10, 22, 8, 24, 6, 26, 4, 28, 2, 30, 32, 0, 30, 2, 28, 4, 26, 6, 24, 8, 22, 10, 20, 12, 18, 14, 16, 16, 14, 18, 12, 20, 10, 22, 8, 24, 6, 26, 4, 28, 2, 30, 32, 0, // Mode 16 + 31, 1, 30, 2, 29, 3, 28, 4, 27, 5, 26, 6, 25, 7, 24, 8, 23, 9, 22, 10, 21, 11, 20, 12, 19, 13, 18, 14, 17, 15, 16, 16, 15, 17, 14, 18, 13, 19, 12, 20, 11, 21, 10, 22, 9, 23, 8, 24, 7, 25, 6, 26, 5, 27, 4, 28, 3, 29, 2, 30, 1, 31, 32, 0, // Mode 17 + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, // Mode 18 + 1, 31, 2, 30, 3, 29, 4, 28, 5, 27, 6, 26, 7, 25, 8, 24, 9, 23, 10, 22, 11, 21, 12, 20, 13, 19, 14, 18, 15, 17, 16, 16, 17, 15, 18, 14, 19, 13, 20, 12, 21, 11, 22, 10, 23, 9, 24, 8, 25, 7, 26, 6, 27, 5, 28, 4, 29, 3, 30, 2, 31, 1, 32, 0, // Mode 19 + 2, 30, 4, 28, 6, 26, 8, 24, 10, 22, 12, 20, 14, 18, 16, 16, 18, 14, 20, 12, 22, 10, 24, 8, 26, 6, 28, 4, 30, 2, 32, 0, 2, 30, 4, 28, 6, 26, 8, 24, 10, 22, 12, 20, 14, 18, 16, 16, 18, 14, 20, 12, 22, 10, 24, 8, 26, 6, 28, 4, 30, 2, 32, 0, // Mode 20 + 3, 29, 6, 26, 9, 23, 12, 20, 15, 17, 18, 14, 21, 11, 24, 8, 27, 5, 30, 2, 1, 31, 4, 28, 7, 25, 10, 22, 13, 19, 16, 16, 19, 13, 22, 10, 25, 7, 28, 4, 31, 1, 2, 30, 5, 27, 8, 24, 11, 21, 14, 18, 17, 15, 20, 12, 23, 9, 26, 6, 29, 3, 32, 0, // Mode 21 + 4, 28, 8, 24, 12, 20, 16, 16, 20, 12, 24, 8, 28, 4, 32, 0, 4, 28, 8, 24, 12, 20, 16, 16, 20, 12, 24, 8, 28, 4, 32, 0, 4, 28, 8, 24, 12, 20, 16, 16, 20, 12, 24, 8, 28, 4, 32, 0, 4, 28, 8, 24, 12, 20, 16, 16, 20, 12, 24, 8, 28, 4, 32, 0, // Mode 22 + 6, 26, 12, 20, 18, 14, 24, 8, 30, 2, 4, 28, 10, 22, 16, 16, 22, 10, 28, 4, 2, 30, 8, 24, 14, 18, 20, 12, 26, 6, 32, 0, 6, 26, 12, 20, 18, 14, 24, 8, 30, 2, 4, 28, 10, 22, 16, 16, 22, 10, 28, 4, 2, 30, 8, 24, 14, 18, 20, 12, 26, 6, 32, 0, // Mode 23 + 8, 24, 16, 16, 24, 8, 32, 0, 8, 24, 16, 16, 24, 8, 32, 0, 8, 24, 16, 16, 24, 8, 32, 0, 8, 24, 16, 16, 24, 8, 32, 0, 8, 24, 16, 16, 24, 8, 32, 0, 8, 24, 16, 16, 24, 8, 32, 0, 8, 24, 16, 16, 24, 8, 32, 0, 8, 24, 16, 16, 24, 8, 32, 0, // Mode 24 + 10, 22, 20, 12, 30, 2, 8, 24, 18, 14, 28, 4, 6, 26, 16, 16, 26, 6, 4, 28, 14, 18, 24, 8, 2, 30, 12, 20, 22, 10, 32, 0, 10, 22, 20, 12, 30, 2, 8, 24, 18, 14, 28, 4, 6, 26, 16, 16, 26, 6, 4, 28, 14, 18, 24, 8, 2, 30, 12, 20, 22, 10, 32, 0, // Mode 25 + 12, 20, 24, 8, 4, 28, 16, 16, 28, 4, 8, 24, 20, 12, 32, 0, 12, 20, 24, 8, 4, 28, 16, 16, 28, 4, 8, 24, 20, 12, 32, 0, 12, 20, 24, 8, 4, 28, 16, 16, 28, 4, 8, 24, 20, 12, 32, 0, 12, 20, 24, 8, 4, 28, 16, 16, 28, 4, 8, 24, 20, 12, 32, 0, // Mode 26 + 14, 18, 28, 4, 10, 22, 24, 8, 6, 26, 20, 12, 2, 30, 16, 16, 30, 2, 12, 20, 26, 6, 8, 24, 22, 10, 4, 28, 18, 14, 32, 0, 14, 18, 28, 4, 10, 22, 24, 8, 6, 26, 20, 12, 2, 30, 16, 16, 30, 2, 12, 20, 26, 6, 8, 24, 22, 10, 4, 28, 18, 14, 32, 0, // Mode 27 + 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, 16, 16, 32, 0, // Mode 28 + 18, 14, 4, 28, 22, 10, 8, 24, 26, 6, 12, 20, 30, 2, 16, 16, 2, 30, 20, 12, 6, 26, 24, 8, 10, 22, 28, 4, 14, 18, 32, 0, 18, 14, 4, 28, 22, 10, 8, 24, 26, 6, 12, 20, 30, 2, 16, 16, 2, 30, 20, 12, 6, 26, 24, 8, 10, 22, 28, 4, 14, 18, 32, 0, // Mode 29 + 20, 12, 8, 24, 28, 4, 16, 16, 4, 28, 24, 8, 12, 20, 32, 0, 20, 12, 8, 24, 28, 4, 16, 16, 4, 28, 24, 8, 12, 20, 32, 0, 20, 12, 8, 24, 28, 4, 16, 16, 4, 28, 24, 8, 12, 20, 32, 0, 20, 12, 8, 24, 28, 4, 16, 16, 4, 28, 24, 8, 12, 20, 32, 0, // Mode 30 + 23, 9, 14, 18, 5, 27, 28, 4, 19, 13, 10, 22, 1, 31, 24, 8, 15, 17, 6, 26, 29, 3, 20, 12, 11, 21, 2, 30, 25, 7, 16, 16, 7, 25, 30, 2, 21, 11, 12, 20, 3, 29, 26, 6, 17, 15, 8, 24, 31, 1, 22, 10, 13, 19, 4, 28, 27, 5, 18, 14, 9, 23, 32, 0, // Mode 31 + 26, 6, 20, 12, 14, 18, 8, 24, 2, 30, 28, 4, 22, 10, 16, 16, 10, 22, 4, 28, 30, 2, 24, 8, 18, 14, 12, 20, 6, 26, 32, 0, 26, 6, 20, 12, 14, 18, 8, 24, 2, 30, 28, 4, 22, 10, 16, 16, 10, 22, 4, 28, 30, 2, 24, 8, 18, 14, 12, 20, 6, 26, 32, 0, // Mode 32 + 29, 3, 26, 6, 23, 9, 20, 12, 17, 15, 14, 18, 11, 21, 8, 24, 5, 27, 2, 30, 31, 1, 28, 4, 25, 7, 22, 10, 19, 13, 16, 16, 13, 19, 10, 22, 7, 25, 4, 28, 1, 31, 30, 2, 27, 5, 24, 8, 21, 11, 18, 14, 15, 17, 12, 20, 9, 23, 6, 26, 3, 29, 32, 0, // Mode 33 + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, // Mode 34 +}; + + +// Chroma linear interpolation filter weights for width 4, vertical modes. +static ALIGNED(32) const int8_t intra_chroma_linear_interpolation_weights_w4_ver[4112] = { + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, // Mode 2 Offset 0 + 3, 29, 3, 29, 3, 29, 3, 29, 6, 26, 6, 26, 6, 26, 6, 26, // Mode 3 Offset 16 + 9, 23, 9, 23, 9, 23, 9, 23, 12, 20, 12, 20, 12, 20, 12, 20, + 15, 17, 15, 17, 15, 17, 15, 17, 18, 14, 18, 14, 18, 14, 18, 14, + 21, 11, 21, 11, 21, 11, 21, 11, 24, 8, 24, 8, 24, 8, 24, 8, + 27, 5, 27, 5, 27, 5, 27, 5, 30, 2, 30, 2, 30, 2, 30, 2, + 1, 31, 1, 31, 1, 31, 1, 31, 4, 28, 4, 28, 4, 28, 4, 28, + 7, 25, 7, 25, 7, 25, 7, 25, 10, 22, 10, 22, 10, 22, 10, 22, + 13, 19, 13, 19, 13, 19, 13, 19, 16, 16, 16, 16, 16, 16, 16, 16, + 19, 13, 19, 13, 19, 13, 19, 13, 22, 10, 22, 10, 22, 10, 22, 10, + 25, 7, 25, 7, 25, 7, 25, 7, 28, 4, 28, 4, 28, 4, 28, 4, + 31, 1, 31, 1, 31, 1, 31, 1, 2, 30, 2, 30, 2, 30, 2, 30, + 5, 27, 5, 27, 5, 27, 5, 27, 8, 24, 8, 24, 8, 24, 8, 24, + 11, 21, 11, 21, 11, 21, 11, 21, 14, 18, 14, 18, 14, 18, 14, 18, + 17, 15, 17, 15, 17, 15, 17, 15, 20, 12, 20, 12, 20, 12, 20, 12, + 23, 9, 23, 9, 23, 9, 23, 9, 26, 6, 26, 6, 26, 6, 26, 6, + 29, 3, 29, 3, 29, 3, 29, 3, 32, 0, 32, 0, 32, 0, 32, 0, + 6, 26, 6, 26, 6, 26, 6, 26, 12, 20, 12, 20, 12, 20, 12, 20, // Mode 4 Offset 272 + 18, 14, 18, 14, 18, 14, 18, 14, 24, 8, 24, 8, 24, 8, 24, 8, + 30, 2, 30, 2, 30, 2, 30, 2, 4, 28, 4, 28, 4, 28, 4, 28, + 10, 22, 10, 22, 10, 22, 10, 22, 16, 16, 16, 16, 16, 16, 16, 16, + 22, 10, 22, 10, 22, 10, 22, 10, 28, 4, 28, 4, 28, 4, 28, 4, + 2, 30, 2, 30, 2, 30, 2, 30, 8, 24, 8, 24, 8, 24, 8, 24, + 14, 18, 14, 18, 14, 18, 14, 18, 20, 12, 20, 12, 20, 12, 20, 12, + 26, 6, 26, 6, 26, 6, 26, 6, 32, 0, 32, 0, 32, 0, 32, 0, + 9, 23, 9, 23, 9, 23, 9, 23, 18, 14, 18, 14, 18, 14, 18, 14, // Mode 5 Offset 400 + 27, 5, 27, 5, 27, 5, 27, 5, 4, 28, 4, 28, 4, 28, 4, 28, + 13, 19, 13, 19, 13, 19, 13, 19, 22, 10, 22, 10, 22, 10, 22, 10, + 31, 1, 31, 1, 31, 1, 31, 1, 8, 24, 8, 24, 8, 24, 8, 24, + 17, 15, 17, 15, 17, 15, 17, 15, 26, 6, 26, 6, 26, 6, 26, 6, + 3, 29, 3, 29, 3, 29, 3, 29, 12, 20, 12, 20, 12, 20, 12, 20, + 21, 11, 21, 11, 21, 11, 21, 11, 30, 2, 30, 2, 30, 2, 30, 2, + 7, 25, 7, 25, 7, 25, 7, 25, 16, 16, 16, 16, 16, 16, 16, 16, + 25, 7, 25, 7, 25, 7, 25, 7, 2, 30, 2, 30, 2, 30, 2, 30, + 11, 21, 11, 21, 11, 21, 11, 21, 20, 12, 20, 12, 20, 12, 20, 12, + 29, 3, 29, 3, 29, 3, 29, 3, 6, 26, 6, 26, 6, 26, 6, 26, + 15, 17, 15, 17, 15, 17, 15, 17, 24, 8, 24, 8, 24, 8, 24, 8, + 1, 31, 1, 31, 1, 31, 1, 31, 10, 22, 10, 22, 10, 22, 10, 22, + 19, 13, 19, 13, 19, 13, 19, 13, 28, 4, 28, 4, 28, 4, 28, 4, + 5, 27, 5, 27, 5, 27, 5, 27, 14, 18, 14, 18, 14, 18, 14, 18, + 23, 9, 23, 9, 23, 9, 23, 9, 32, 0, 32, 0, 32, 0, 32, 0, + 12, 20, 12, 20, 12, 20, 12, 20, 24, 8, 24, 8, 24, 8, 24, 8, // Mode 6 Offset 656 + 4, 28, 4, 28, 4, 28, 4, 28, 16, 16, 16, 16, 16, 16, 16, 16, + 28, 4, 28, 4, 28, 4, 28, 4, 8, 24, 8, 24, 8, 24, 8, 24, + 20, 12, 20, 12, 20, 12, 20, 12, 32, 0, 32, 0, 32, 0, 32, 0, + 14, 18, 14, 18, 14, 18, 14, 18, 28, 4, 28, 4, 28, 4, 28, 4, // Mode 7 Offset 720 + 10, 22, 10, 22, 10, 22, 10, 22, 24, 8, 24, 8, 24, 8, 24, 8, + 6, 26, 6, 26, 6, 26, 6, 26, 20, 12, 20, 12, 20, 12, 20, 12, + 2, 30, 2, 30, 2, 30, 2, 30, 16, 16, 16, 16, 16, 16, 16, 16, + 30, 2, 30, 2, 30, 2, 30, 2, 12, 20, 12, 20, 12, 20, 12, 20, + 26, 6, 26, 6, 26, 6, 26, 6, 8, 24, 8, 24, 8, 24, 8, 24, + 22, 10, 22, 10, 22, 10, 22, 10, 4, 28, 4, 28, 4, 28, 4, 28, + 18, 14, 18, 14, 18, 14, 18, 14, 32, 0, 32, 0, 32, 0, 32, 0, + 16, 16, 16, 16, 16, 16, 16, 16, 32, 0, 32, 0, 32, 0, 32, 0, // Mode 8 Offset 848 + 18, 14, 18, 14, 18, 14, 18, 14, 4, 28, 4, 28, 4, 28, 4, 28, // Mode 9 Offset 864 + 22, 10, 22, 10, 22, 10, 22, 10, 8, 24, 8, 24, 8, 24, 8, 24, + 26, 6, 26, 6, 26, 6, 26, 6, 12, 20, 12, 20, 12, 20, 12, 20, + 30, 2, 30, 2, 30, 2, 30, 2, 16, 16, 16, 16, 16, 16, 16, 16, + 2, 30, 2, 30, 2, 30, 2, 30, 20, 12, 20, 12, 20, 12, 20, 12, + 6, 26, 6, 26, 6, 26, 6, 26, 24, 8, 24, 8, 24, 8, 24, 8, + 10, 22, 10, 22, 10, 22, 10, 22, 28, 4, 28, 4, 28, 4, 28, 4, + 14, 18, 14, 18, 14, 18, 14, 18, 32, 0, 32, 0, 32, 0, 32, 0, + 20, 12, 20, 12, 20, 12, 20, 12, 8, 24, 8, 24, 8, 24, 8, 24, // Mode 10 Offset 992 + 28, 4, 28, 4, 28, 4, 28, 4, 16, 16, 16, 16, 16, 16, 16, 16, + 4, 28, 4, 28, 4, 28, 4, 28, 24, 8, 24, 8, 24, 8, 24, 8, + 12, 20, 12, 20, 12, 20, 12, 20, 32, 0, 32, 0, 32, 0, 32, 0, + 22, 10, 22, 10, 22, 10, 22, 10, 12, 20, 12, 20, 12, 20, 12, 20, // Mode 11 Offset 1056 + 2, 30, 2, 30, 2, 30, 2, 30, 24, 8, 24, 8, 24, 8, 24, 8, + 14, 18, 14, 18, 14, 18, 14, 18, 4, 28, 4, 28, 4, 28, 4, 28, + 26, 6, 26, 6, 26, 6, 26, 6, 16, 16, 16, 16, 16, 16, 16, 16, + 6, 26, 6, 26, 6, 26, 6, 26, 28, 4, 28, 4, 28, 4, 28, 4, + 18, 14, 18, 14, 18, 14, 18, 14, 8, 24, 8, 24, 8, 24, 8, 24, + 30, 2, 30, 2, 30, 2, 30, 2, 20, 12, 20, 12, 20, 12, 20, 12, + 10, 22, 10, 22, 10, 22, 10, 22, 32, 0, 32, 0, 32, 0, 32, 0, + 24, 8, 24, 8, 24, 8, 24, 8, 16, 16, 16, 16, 16, 16, 16, 16, // Mode 12 Offset 1184 + 8, 24, 8, 24, 8, 24, 8, 24, 32, 0, 32, 0, 32, 0, 32, 0, + 26, 6, 26, 6, 26, 6, 26, 6, 20, 12, 20, 12, 20, 12, 20, 12, // Mode 13 Offset 1216 + 14, 18, 14, 18, 14, 18, 14, 18, 8, 24, 8, 24, 8, 24, 8, 24, + 2, 30, 2, 30, 2, 30, 2, 30, 28, 4, 28, 4, 28, 4, 28, 4, + 22, 10, 22, 10, 22, 10, 22, 10, 16, 16, 16, 16, 16, 16, 16, 16, + 10, 22, 10, 22, 10, 22, 10, 22, 4, 28, 4, 28, 4, 28, 4, 28, + 30, 2, 30, 2, 30, 2, 30, 2, 24, 8, 24, 8, 24, 8, 24, 8, + 18, 14, 18, 14, 18, 14, 18, 14, 12, 20, 12, 20, 12, 20, 12, 20, + 6, 26, 6, 26, 6, 26, 6, 26, 32, 0, 32, 0, 32, 0, 32, 0, + 28, 4, 28, 4, 28, 4, 28, 4, 24, 8, 24, 8, 24, 8, 24, 8, // Mode 14 Offset 1344 + 20, 12, 20, 12, 20, 12, 20, 12, 16, 16, 16, 16, 16, 16, 16, 16, + 12, 20, 12, 20, 12, 20, 12, 20, 8, 24, 8, 24, 8, 24, 8, 24, + 4, 28, 4, 28, 4, 28, 4, 28, 32, 0, 32, 0, 32, 0, 32, 0, + 29, 3, 29, 3, 29, 3, 29, 3, 26, 6, 26, 6, 26, 6, 26, 6, // Mode 15 Offset 1408 + 23, 9, 23, 9, 23, 9, 23, 9, 20, 12, 20, 12, 20, 12, 20, 12, + 17, 15, 17, 15, 17, 15, 17, 15, 14, 18, 14, 18, 14, 18, 14, 18, + 11, 21, 11, 21, 11, 21, 11, 21, 8, 24, 8, 24, 8, 24, 8, 24, + 5, 27, 5, 27, 5, 27, 5, 27, 2, 30, 2, 30, 2, 30, 2, 30, + 31, 1, 31, 1, 31, 1, 31, 1, 28, 4, 28, 4, 28, 4, 28, 4, + 25, 7, 25, 7, 25, 7, 25, 7, 22, 10, 22, 10, 22, 10, 22, 10, + 19, 13, 19, 13, 19, 13, 19, 13, 16, 16, 16, 16, 16, 16, 16, 16, + 13, 19, 13, 19, 13, 19, 13, 19, 10, 22, 10, 22, 10, 22, 10, 22, + 7, 25, 7, 25, 7, 25, 7, 25, 4, 28, 4, 28, 4, 28, 4, 28, + 1, 31, 1, 31, 1, 31, 1, 31, 30, 2, 30, 2, 30, 2, 30, 2, + 27, 5, 27, 5, 27, 5, 27, 5, 24, 8, 24, 8, 24, 8, 24, 8, + 21, 11, 21, 11, 21, 11, 21, 11, 18, 14, 18, 14, 18, 14, 18, 14, + 15, 17, 15, 17, 15, 17, 15, 17, 12, 20, 12, 20, 12, 20, 12, 20, + 9, 23, 9, 23, 9, 23, 9, 23, 6, 26, 6, 26, 6, 26, 6, 26, + 3, 29, 3, 29, 3, 29, 3, 29, 32, 0, 32, 0, 32, 0, 32, 0, + 30, 2, 30, 2, 30, 2, 30, 2, 28, 4, 28, 4, 28, 4, 28, 4, // Mode 16 Offset 1664 + 26, 6, 26, 6, 26, 6, 26, 6, 24, 8, 24, 8, 24, 8, 24, 8, + 22, 10, 22, 10, 22, 10, 22, 10, 20, 12, 20, 12, 20, 12, 20, 12, + 18, 14, 18, 14, 18, 14, 18, 14, 16, 16, 16, 16, 16, 16, 16, 16, + 14, 18, 14, 18, 14, 18, 14, 18, 12, 20, 12, 20, 12, 20, 12, 20, + 10, 22, 10, 22, 10, 22, 10, 22, 8, 24, 8, 24, 8, 24, 8, 24, + 6, 26, 6, 26, 6, 26, 6, 26, 4, 28, 4, 28, 4, 28, 4, 28, + 2, 30, 2, 30, 2, 30, 2, 30, 32, 0, 32, 0, 32, 0, 32, 0, + 31, 1, 31, 1, 31, 1, 31, 1, 30, 2, 30, 2, 30, 2, 30, 2, // Mode 17 Offset 1792 + 29, 3, 29, 3, 29, 3, 29, 3, 28, 4, 28, 4, 28, 4, 28, 4, + 27, 5, 27, 5, 27, 5, 27, 5, 26, 6, 26, 6, 26, 6, 26, 6, + 25, 7, 25, 7, 25, 7, 25, 7, 24, 8, 24, 8, 24, 8, 24, 8, + 23, 9, 23, 9, 23, 9, 23, 9, 22, 10, 22, 10, 22, 10, 22, 10, + 21, 11, 21, 11, 21, 11, 21, 11, 20, 12, 20, 12, 20, 12, 20, 12, + 19, 13, 19, 13, 19, 13, 19, 13, 18, 14, 18, 14, 18, 14, 18, 14, + 17, 15, 17, 15, 17, 15, 17, 15, 16, 16, 16, 16, 16, 16, 16, 16, + 15, 17, 15, 17, 15, 17, 15, 17, 14, 18, 14, 18, 14, 18, 14, 18, + 13, 19, 13, 19, 13, 19, 13, 19, 12, 20, 12, 20, 12, 20, 12, 20, + 11, 21, 11, 21, 11, 21, 11, 21, 10, 22, 10, 22, 10, 22, 10, 22, + 9, 23, 9, 23, 9, 23, 9, 23, 8, 24, 8, 24, 8, 24, 8, 24, + 7, 25, 7, 25, 7, 25, 7, 25, 6, 26, 6, 26, 6, 26, 6, 26, + 5, 27, 5, 27, 5, 27, 5, 27, 4, 28, 4, 28, 4, 28, 4, 28, + 3, 29, 3, 29, 3, 29, 3, 29, 2, 30, 2, 30, 2, 30, 2, 30, + 1, 31, 1, 31, 1, 31, 1, 31, 32, 0, 32, 0, 32, 0, 32, 0, + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, // Mode 18 Offset 2048 + 1, 31, 1, 31, 1, 31, 1, 31, 2, 30, 2, 30, 2, 30, 2, 30, // Mode 19 Offset 2064 + 3, 29, 3, 29, 3, 29, 3, 29, 4, 28, 4, 28, 4, 28, 4, 28, + 5, 27, 5, 27, 5, 27, 5, 27, 6, 26, 6, 26, 6, 26, 6, 26, + 7, 25, 7, 25, 7, 25, 7, 25, 8, 24, 8, 24, 8, 24, 8, 24, + 9, 23, 9, 23, 9, 23, 9, 23, 10, 22, 10, 22, 10, 22, 10, 22, + 11, 21, 11, 21, 11, 21, 11, 21, 12, 20, 12, 20, 12, 20, 12, 20, + 13, 19, 13, 19, 13, 19, 13, 19, 14, 18, 14, 18, 14, 18, 14, 18, + 15, 17, 15, 17, 15, 17, 15, 17, 16, 16, 16, 16, 16, 16, 16, 16, + 17, 15, 17, 15, 17, 15, 17, 15, 18, 14, 18, 14, 18, 14, 18, 14, + 19, 13, 19, 13, 19, 13, 19, 13, 20, 12, 20, 12, 20, 12, 20, 12, + 21, 11, 21, 11, 21, 11, 21, 11, 22, 10, 22, 10, 22, 10, 22, 10, + 23, 9, 23, 9, 23, 9, 23, 9, 24, 8, 24, 8, 24, 8, 24, 8, + 25, 7, 25, 7, 25, 7, 25, 7, 26, 6, 26, 6, 26, 6, 26, 6, + 27, 5, 27, 5, 27, 5, 27, 5, 28, 4, 28, 4, 28, 4, 28, 4, + 29, 3, 29, 3, 29, 3, 29, 3, 30, 2, 30, 2, 30, 2, 30, 2, + 31, 1, 31, 1, 31, 1, 31, 1, 32, 0, 32, 0, 32, 0, 32, 0, + 2, 30, 2, 30, 2, 30, 2, 30, 4, 28, 4, 28, 4, 28, 4, 28, // Mode 20 Offset 2320 + 6, 26, 6, 26, 6, 26, 6, 26, 8, 24, 8, 24, 8, 24, 8, 24, + 10, 22, 10, 22, 10, 22, 10, 22, 12, 20, 12, 20, 12, 20, 12, 20, + 14, 18, 14, 18, 14, 18, 14, 18, 16, 16, 16, 16, 16, 16, 16, 16, + 18, 14, 18, 14, 18, 14, 18, 14, 20, 12, 20, 12, 20, 12, 20, 12, + 22, 10, 22, 10, 22, 10, 22, 10, 24, 8, 24, 8, 24, 8, 24, 8, + 26, 6, 26, 6, 26, 6, 26, 6, 28, 4, 28, 4, 28, 4, 28, 4, + 30, 2, 30, 2, 30, 2, 30, 2, 32, 0, 32, 0, 32, 0, 32, 0, + 3, 29, 3, 29, 3, 29, 3, 29, 6, 26, 6, 26, 6, 26, 6, 26, // Mode 21 Offset 2448 + 9, 23, 9, 23, 9, 23, 9, 23, 12, 20, 12, 20, 12, 20, 12, 20, + 15, 17, 15, 17, 15, 17, 15, 17, 18, 14, 18, 14, 18, 14, 18, 14, + 21, 11, 21, 11, 21, 11, 21, 11, 24, 8, 24, 8, 24, 8, 24, 8, + 27, 5, 27, 5, 27, 5, 27, 5, 30, 2, 30, 2, 30, 2, 30, 2, + 1, 31, 1, 31, 1, 31, 1, 31, 4, 28, 4, 28, 4, 28, 4, 28, + 7, 25, 7, 25, 7, 25, 7, 25, 10, 22, 10, 22, 10, 22, 10, 22, + 13, 19, 13, 19, 13, 19, 13, 19, 16, 16, 16, 16, 16, 16, 16, 16, + 19, 13, 19, 13, 19, 13, 19, 13, 22, 10, 22, 10, 22, 10, 22, 10, + 25, 7, 25, 7, 25, 7, 25, 7, 28, 4, 28, 4, 28, 4, 28, 4, + 31, 1, 31, 1, 31, 1, 31, 1, 2, 30, 2, 30, 2, 30, 2, 30, + 5, 27, 5, 27, 5, 27, 5, 27, 8, 24, 8, 24, 8, 24, 8, 24, + 11, 21, 11, 21, 11, 21, 11, 21, 14, 18, 14, 18, 14, 18, 14, 18, + 17, 15, 17, 15, 17, 15, 17, 15, 20, 12, 20, 12, 20, 12, 20, 12, + 23, 9, 23, 9, 23, 9, 23, 9, 26, 6, 26, 6, 26, 6, 26, 6, + 29, 3, 29, 3, 29, 3, 29, 3, 32, 0, 32, 0, 32, 0, 32, 0, + 4, 28, 4, 28, 4, 28, 4, 28, 8, 24, 8, 24, 8, 24, 8, 24, // Mode 22 Offset 2704 + 12, 20, 12, 20, 12, 20, 12, 20, 16, 16, 16, 16, 16, 16, 16, 16, + 20, 12, 20, 12, 20, 12, 20, 12, 24, 8, 24, 8, 24, 8, 24, 8, + 28, 4, 28, 4, 28, 4, 28, 4, 32, 0, 32, 0, 32, 0, 32, 0, + 6, 26, 6, 26, 6, 26, 6, 26, 12, 20, 12, 20, 12, 20, 12, 20, // Mode 23 Offset 2768 + 18, 14, 18, 14, 18, 14, 18, 14, 24, 8, 24, 8, 24, 8, 24, 8, + 30, 2, 30, 2, 30, 2, 30, 2, 4, 28, 4, 28, 4, 28, 4, 28, + 10, 22, 10, 22, 10, 22, 10, 22, 16, 16, 16, 16, 16, 16, 16, 16, + 22, 10, 22, 10, 22, 10, 22, 10, 28, 4, 28, 4, 28, 4, 28, 4, + 2, 30, 2, 30, 2, 30, 2, 30, 8, 24, 8, 24, 8, 24, 8, 24, + 14, 18, 14, 18, 14, 18, 14, 18, 20, 12, 20, 12, 20, 12, 20, 12, + 26, 6, 26, 6, 26, 6, 26, 6, 32, 0, 32, 0, 32, 0, 32, 0, + 8, 24, 8, 24, 8, 24, 8, 24, 16, 16, 16, 16, 16, 16, 16, 16, // Mode 24 Offset 2896 + 24, 8, 24, 8, 24, 8, 24, 8, 32, 0, 32, 0, 32, 0, 32, 0, + 10, 22, 10, 22, 10, 22, 10, 22, 20, 12, 20, 12, 20, 12, 20, 12, // Mode 25 Offset 2928 + 30, 2, 30, 2, 30, 2, 30, 2, 8, 24, 8, 24, 8, 24, 8, 24, + 18, 14, 18, 14, 18, 14, 18, 14, 28, 4, 28, 4, 28, 4, 28, 4, + 6, 26, 6, 26, 6, 26, 6, 26, 16, 16, 16, 16, 16, 16, 16, 16, + 26, 6, 26, 6, 26, 6, 26, 6, 4, 28, 4, 28, 4, 28, 4, 28, + 14, 18, 14, 18, 14, 18, 14, 18, 24, 8, 24, 8, 24, 8, 24, 8, + 2, 30, 2, 30, 2, 30, 2, 30, 12, 20, 12, 20, 12, 20, 12, 20, + 22, 10, 22, 10, 22, 10, 22, 10, 32, 0, 32, 0, 32, 0, 32, 0, + 12, 20, 12, 20, 12, 20, 12, 20, 24, 8, 24, 8, 24, 8, 24, 8, // Mode 26 Offset 3056 + 4, 28, 4, 28, 4, 28, 4, 28, 16, 16, 16, 16, 16, 16, 16, 16, + 28, 4, 28, 4, 28, 4, 28, 4, 8, 24, 8, 24, 8, 24, 8, 24, + 20, 12, 20, 12, 20, 12, 20, 12, 32, 0, 32, 0, 32, 0, 32, 0, + 14, 18, 14, 18, 14, 18, 14, 18, 28, 4, 28, 4, 28, 4, 28, 4, // Mode 27 Offset 3120 + 10, 22, 10, 22, 10, 22, 10, 22, 24, 8, 24, 8, 24, 8, 24, 8, + 6, 26, 6, 26, 6, 26, 6, 26, 20, 12, 20, 12, 20, 12, 20, 12, + 2, 30, 2, 30, 2, 30, 2, 30, 16, 16, 16, 16, 16, 16, 16, 16, + 30, 2, 30, 2, 30, 2, 30, 2, 12, 20, 12, 20, 12, 20, 12, 20, + 26, 6, 26, 6, 26, 6, 26, 6, 8, 24, 8, 24, 8, 24, 8, 24, + 22, 10, 22, 10, 22, 10, 22, 10, 4, 28, 4, 28, 4, 28, 4, 28, + 18, 14, 18, 14, 18, 14, 18, 14, 32, 0, 32, 0, 32, 0, 32, 0, + 16, 16, 16, 16, 16, 16, 16, 16, 32, 0, 32, 0, 32, 0, 32, 0, // Mode 28 Offset 3248 + 18, 14, 18, 14, 18, 14, 18, 14, 4, 28, 4, 28, 4, 28, 4, 28, // Mode 29 Offset 3264 + 22, 10, 22, 10, 22, 10, 22, 10, 8, 24, 8, 24, 8, 24, 8, 24, + 26, 6, 26, 6, 26, 6, 26, 6, 12, 20, 12, 20, 12, 20, 12, 20, + 30, 2, 30, 2, 30, 2, 30, 2, 16, 16, 16, 16, 16, 16, 16, 16, + 2, 30, 2, 30, 2, 30, 2, 30, 20, 12, 20, 12, 20, 12, 20, 12, + 6, 26, 6, 26, 6, 26, 6, 26, 24, 8, 24, 8, 24, 8, 24, 8, + 10, 22, 10, 22, 10, 22, 10, 22, 28, 4, 28, 4, 28, 4, 28, 4, + 14, 18, 14, 18, 14, 18, 14, 18, 32, 0, 32, 0, 32, 0, 32, 0, + 20, 12, 20, 12, 20, 12, 20, 12, 8, 24, 8, 24, 8, 24, 8, 24, // Mode 30 Offset 3392 + 28, 4, 28, 4, 28, 4, 28, 4, 16, 16, 16, 16, 16, 16, 16, 16, + 4, 28, 4, 28, 4, 28, 4, 28, 24, 8, 24, 8, 24, 8, 24, 8, + 12, 20, 12, 20, 12, 20, 12, 20, 32, 0, 32, 0, 32, 0, 32, 0, + 23, 9, 23, 9, 23, 9, 23, 9, 14, 18, 14, 18, 14, 18, 14, 18, // Mode 31 Offset 3456 + 5, 27, 5, 27, 5, 27, 5, 27, 28, 4, 28, 4, 28, 4, 28, 4, + 19, 13, 19, 13, 19, 13, 19, 13, 10, 22, 10, 22, 10, 22, 10, 22, + 1, 31, 1, 31, 1, 31, 1, 31, 24, 8, 24, 8, 24, 8, 24, 8, + 15, 17, 15, 17, 15, 17, 15, 17, 6, 26, 6, 26, 6, 26, 6, 26, + 29, 3, 29, 3, 29, 3, 29, 3, 20, 12, 20, 12, 20, 12, 20, 12, + 11, 21, 11, 21, 11, 21, 11, 21, 2, 30, 2, 30, 2, 30, 2, 30, + 25, 7, 25, 7, 25, 7, 25, 7, 16, 16, 16, 16, 16, 16, 16, 16, + 7, 25, 7, 25, 7, 25, 7, 25, 30, 2, 30, 2, 30, 2, 30, 2, + 21, 11, 21, 11, 21, 11, 21, 11, 12, 20, 12, 20, 12, 20, 12, 20, + 3, 29, 3, 29, 3, 29, 3, 29, 26, 6, 26, 6, 26, 6, 26, 6, + 17, 15, 17, 15, 17, 15, 17, 15, 8, 24, 8, 24, 8, 24, 8, 24, + 31, 1, 31, 1, 31, 1, 31, 1, 22, 10, 22, 10, 22, 10, 22, 10, + 13, 19, 13, 19, 13, 19, 13, 19, 4, 28, 4, 28, 4, 28, 4, 28, + 27, 5, 27, 5, 27, 5, 27, 5, 18, 14, 18, 14, 18, 14, 18, 14, + 9, 23, 9, 23, 9, 23, 9, 23, 32, 0, 32, 0, 32, 0, 32, 0, + 26, 6, 26, 6, 26, 6, 26, 6, 20, 12, 20, 12, 20, 12, 20, 12, // Mode 32 Offset 3712 + 14, 18, 14, 18, 14, 18, 14, 18, 8, 24, 8, 24, 8, 24, 8, 24, + 2, 30, 2, 30, 2, 30, 2, 30, 28, 4, 28, 4, 28, 4, 28, 4, + 22, 10, 22, 10, 22, 10, 22, 10, 16, 16, 16, 16, 16, 16, 16, 16, + 10, 22, 10, 22, 10, 22, 10, 22, 4, 28, 4, 28, 4, 28, 4, 28, + 30, 2, 30, 2, 30, 2, 30, 2, 24, 8, 24, 8, 24, 8, 24, 8, + 18, 14, 18, 14, 18, 14, 18, 14, 12, 20, 12, 20, 12, 20, 12, 20, + 6, 26, 6, 26, 6, 26, 6, 26, 32, 0, 32, 0, 32, 0, 32, 0, + 29, 3, 29, 3, 29, 3, 29, 3, 26, 6, 26, 6, 26, 6, 26, 6, // Mode 33 Offset 3840 + 23, 9, 23, 9, 23, 9, 23, 9, 20, 12, 20, 12, 20, 12, 20, 12, + 17, 15, 17, 15, 17, 15, 17, 15, 14, 18, 14, 18, 14, 18, 14, 18, + 11, 21, 11, 21, 11, 21, 11, 21, 8, 24, 8, 24, 8, 24, 8, 24, + 5, 27, 5, 27, 5, 27, 5, 27, 2, 30, 2, 30, 2, 30, 2, 30, + 31, 1, 31, 1, 31, 1, 31, 1, 28, 4, 28, 4, 28, 4, 28, 4, + 25, 7, 25, 7, 25, 7, 25, 7, 22, 10, 22, 10, 22, 10, 22, 10, + 19, 13, 19, 13, 19, 13, 19, 13, 16, 16, 16, 16, 16, 16, 16, 16, + 13, 19, 13, 19, 13, 19, 13, 19, 10, 22, 10, 22, 10, 22, 10, 22, + 7, 25, 7, 25, 7, 25, 7, 25, 4, 28, 4, 28, 4, 28, 4, 28, + 1, 31, 1, 31, 1, 31, 1, 31, 30, 2, 30, 2, 30, 2, 30, 2, + 27, 5, 27, 5, 27, 5, 27, 5, 24, 8, 24, 8, 24, 8, 24, 8, + 21, 11, 21, 11, 21, 11, 21, 11, 18, 14, 18, 14, 18, 14, 18, 14, + 15, 17, 15, 17, 15, 17, 15, 17, 12, 20, 12, 20, 12, 20, 12, 20, + 9, 23, 9, 23, 9, 23, 9, 23, 6, 26, 6, 26, 6, 26, 6, 26, + 3, 29, 3, 29, 3, 29, 3, 29, 32, 0, 32, 0, 32, 0, 32, 0, + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, // Mode 34 Offset 4096 +}; + + +// Chroma linear interpolation filter weights for width 4, wide angle vertical modes. +static ALIGNED(32) const int8_t intra_chroma_linear_interpolation_weights_w4_ver_wide_angle[2368] = { + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, // Mode -12 Offset 0 + 11, 21, 11, 21, 11, 21, 11, 21, 22, 10, 22, 10, 22, 10, 22, 10, // Mode -11 Offset 16 + 1, 31, 1, 31, 1, 31, 1, 31, 12, 20, 12, 20, 12, 20, 12, 20, + 23, 9, 23, 9, 23, 9, 23, 9, 2, 30, 2, 30, 2, 30, 2, 30, + 13, 19, 13, 19, 13, 19, 13, 19, 24, 8, 24, 8, 24, 8, 24, 8, + 3, 29, 3, 29, 3, 29, 3, 29, 14, 18, 14, 18, 14, 18, 14, 18, + 25, 7, 25, 7, 25, 7, 25, 7, 4, 28, 4, 28, 4, 28, 4, 28, + 15, 17, 15, 17, 15, 17, 15, 17, 26, 6, 26, 6, 26, 6, 26, 6, + 5, 27, 5, 27, 5, 27, 5, 27, 16, 16, 16, 16, 16, 16, 16, 16, + 27, 5, 27, 5, 27, 5, 27, 5, 6, 26, 6, 26, 6, 26, 6, 26, + 17, 15, 17, 15, 17, 15, 17, 15, 28, 4, 28, 4, 28, 4, 28, 4, + 7, 25, 7, 25, 7, 25, 7, 25, 18, 14, 18, 14, 18, 14, 18, 14, + 29, 3, 29, 3, 29, 3, 29, 3, 8, 24, 8, 24, 8, 24, 8, 24, + 19, 13, 19, 13, 19, 13, 19, 13, 30, 2, 30, 2, 30, 2, 30, 2, + 9, 23, 9, 23, 9, 23, 9, 23, 20, 12, 20, 12, 20, 12, 20, 12, + 31, 1, 31, 1, 31, 1, 31, 1, 10, 22, 10, 22, 10, 22, 10, 22, + 21, 11, 21, 11, 21, 11, 21, 11, 32, 0, 32, 0, 32, 0, 32, 0, + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, // Mode -10 Offset 272 + 21, 11, 21, 11, 21, 11, 21, 11, 10, 22, 10, 22, 10, 22, 10, 22, // Mode -9 Offset 288 + 31, 1, 31, 1, 31, 1, 31, 1, 20, 12, 20, 12, 20, 12, 20, 12, + 9, 23, 9, 23, 9, 23, 9, 23, 30, 2, 30, 2, 30, 2, 30, 2, + 19, 13, 19, 13, 19, 13, 19, 13, 8, 24, 8, 24, 8, 24, 8, 24, + 29, 3, 29, 3, 29, 3, 29, 3, 18, 14, 18, 14, 18, 14, 18, 14, + 7, 25, 7, 25, 7, 25, 7, 25, 28, 4, 28, 4, 28, 4, 28, 4, + 17, 15, 17, 15, 17, 15, 17, 15, 6, 26, 6, 26, 6, 26, 6, 26, + 27, 5, 27, 5, 27, 5, 27, 5, 16, 16, 16, 16, 16, 16, 16, 16, + 5, 27, 5, 27, 5, 27, 5, 27, 26, 6, 26, 6, 26, 6, 26, 6, + 15, 17, 15, 17, 15, 17, 15, 17, 4, 28, 4, 28, 4, 28, 4, 28, + 25, 7, 25, 7, 25, 7, 25, 7, 14, 18, 14, 18, 14, 18, 14, 18, + 3, 29, 3, 29, 3, 29, 3, 29, 24, 8, 24, 8, 24, 8, 24, 8, + 13, 19, 13, 19, 13, 19, 13, 19, 2, 30, 2, 30, 2, 30, 2, 30, + 23, 9, 23, 9, 23, 9, 23, 9, 12, 20, 12, 20, 12, 20, 12, 20, + 1, 31, 1, 31, 1, 31, 1, 31, 22, 10, 22, 10, 22, 10, 22, 10, + 11, 21, 11, 21, 11, 21, 11, 21, 32, 0, 32, 0, 32, 0, 32, 0, + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, // Mode -8 Offset 544 + 26, 6, 26, 6, 26, 6, 26, 6, 20, 12, 20, 12, 20, 12, 20, 12, // Mode -7 Offset 560 + 14, 18, 14, 18, 14, 18, 14, 18, 8, 24, 8, 24, 8, 24, 8, 24, + 2, 30, 2, 30, 2, 30, 2, 30, 28, 4, 28, 4, 28, 4, 28, 4, + 22, 10, 22, 10, 22, 10, 22, 10, 16, 16, 16, 16, 16, 16, 16, 16, + 10, 22, 10, 22, 10, 22, 10, 22, 4, 28, 4, 28, 4, 28, 4, 28, + 30, 2, 30, 2, 30, 2, 30, 2, 24, 8, 24, 8, 24, 8, 24, 8, + 18, 14, 18, 14, 18, 14, 18, 14, 12, 20, 12, 20, 12, 20, 12, 20, + 6, 26, 6, 26, 6, 26, 6, 26, 32, 0, 32, 0, 32, 0, 32, 0, + 10, 22, 10, 22, 10, 22, 10, 22, 20, 12, 20, 12, 20, 12, 20, 12, // Mode -6 Offset 688 + 30, 2, 30, 2, 30, 2, 30, 2, 8, 24, 8, 24, 8, 24, 8, 24, + 18, 14, 18, 14, 18, 14, 18, 14, 28, 4, 28, 4, 28, 4, 28, 4, + 6, 26, 6, 26, 6, 26, 6, 26, 16, 16, 16, 16, 16, 16, 16, 16, + 26, 6, 26, 6, 26, 6, 26, 6, 4, 28, 4, 28, 4, 28, 4, 28, + 14, 18, 14, 18, 14, 18, 14, 18, 24, 8, 24, 8, 24, 8, 24, 8, + 2, 30, 2, 30, 2, 30, 2, 30, 12, 20, 12, 20, 12, 20, 12, 20, + 22, 10, 22, 10, 22, 10, 22, 10, 32, 0, 32, 0, 32, 0, 32, 0, + 23, 9, 23, 9, 23, 9, 23, 9, 14, 18, 14, 18, 14, 18, 14, 18, // Mode -5 Offset 816 + 5, 27, 5, 27, 5, 27, 5, 27, 28, 4, 28, 4, 28, 4, 28, 4, + 19, 13, 19, 13, 19, 13, 19, 13, 10, 22, 10, 22, 10, 22, 10, 22, + 1, 31, 1, 31, 1, 31, 1, 31, 24, 8, 24, 8, 24, 8, 24, 8, + 15, 17, 15, 17, 15, 17, 15, 17, 6, 26, 6, 26, 6, 26, 6, 26, + 29, 3, 29, 3, 29, 3, 29, 3, 20, 12, 20, 12, 20, 12, 20, 12, + 11, 21, 11, 21, 11, 21, 11, 21, 2, 30, 2, 30, 2, 30, 2, 30, + 25, 7, 25, 7, 25, 7, 25, 7, 16, 16, 16, 16, 16, 16, 16, 16, + 7, 25, 7, 25, 7, 25, 7, 25, 30, 2, 30, 2, 30, 2, 30, 2, + 21, 11, 21, 11, 21, 11, 21, 11, 12, 20, 12, 20, 12, 20, 12, 20, + 3, 29, 3, 29, 3, 29, 3, 29, 26, 6, 26, 6, 26, 6, 26, 6, + 17, 15, 17, 15, 17, 15, 17, 15, 8, 24, 8, 24, 8, 24, 8, 24, + 31, 1, 31, 1, 31, 1, 31, 1, 22, 10, 22, 10, 22, 10, 22, 10, + 13, 19, 13, 19, 13, 19, 13, 19, 4, 28, 4, 28, 4, 28, 4, 28, + 27, 5, 27, 5, 27, 5, 27, 5, 18, 14, 18, 14, 18, 14, 18, 14, + 9, 23, 9, 23, 9, 23, 9, 23, 32, 0, 32, 0, 32, 0, 32, 0, + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, // Mode -4 Offset 1072 + 7, 25, 7, 25, 7, 25, 7, 25, 14, 18, 14, 18, 14, 18, 14, 18, // Mode -3 Offset 1088 + 21, 11, 21, 11, 21, 11, 21, 11, 28, 4, 28, 4, 28, 4, 28, 4, + 3, 29, 3, 29, 3, 29, 3, 29, 10, 22, 10, 22, 10, 22, 10, 22, + 17, 15, 17, 15, 17, 15, 17, 15, 24, 8, 24, 8, 24, 8, 24, 8, + 31, 1, 31, 1, 31, 1, 31, 1, 6, 26, 6, 26, 6, 26, 6, 26, + 13, 19, 13, 19, 13, 19, 13, 19, 20, 12, 20, 12, 20, 12, 20, 12, + 27, 5, 27, 5, 27, 5, 27, 5, 2, 30, 2, 30, 2, 30, 2, 30, + 9, 23, 9, 23, 9, 23, 9, 23, 16, 16, 16, 16, 16, 16, 16, 16, + 23, 9, 23, 9, 23, 9, 23, 9, 30, 2, 30, 2, 30, 2, 30, 2, + 5, 27, 5, 27, 5, 27, 5, 27, 12, 20, 12, 20, 12, 20, 12, 20, + 19, 13, 19, 13, 19, 13, 19, 13, 26, 6, 26, 6, 26, 6, 26, 6, + 1, 31, 1, 31, 1, 31, 1, 31, 8, 24, 8, 24, 8, 24, 8, 24, + 15, 17, 15, 17, 15, 17, 15, 17, 22, 10, 22, 10, 22, 10, 22, 10, + 29, 3, 29, 3, 29, 3, 29, 3, 4, 28, 4, 28, 4, 28, 4, 28, + 11, 21, 11, 21, 11, 21, 11, 21, 18, 14, 18, 14, 18, 14, 18, 14, + 25, 7, 25, 7, 25, 7, 25, 7, 32, 0, 32, 0, 32, 0, 32, 0, + 13, 19, 13, 19, 13, 19, 13, 19, 26, 6, 26, 6, 26, 6, 26, 6, // Mode -2 Offset 1344 + 7, 25, 7, 25, 7, 25, 7, 25, 20, 12, 20, 12, 20, 12, 20, 12, + 1, 31, 1, 31, 1, 31, 1, 31, 14, 18, 14, 18, 14, 18, 14, 18, + 27, 5, 27, 5, 27, 5, 27, 5, 8, 24, 8, 24, 8, 24, 8, 24, + 21, 11, 21, 11, 21, 11, 21, 11, 2, 30, 2, 30, 2, 30, 2, 30, + 15, 17, 15, 17, 15, 17, 15, 17, 28, 4, 28, 4, 28, 4, 28, 4, + 9, 23, 9, 23, 9, 23, 9, 23, 22, 10, 22, 10, 22, 10, 22, 10, + 3, 29, 3, 29, 3, 29, 3, 29, 16, 16, 16, 16, 16, 16, 16, 16, + 29, 3, 29, 3, 29, 3, 29, 3, 10, 22, 10, 22, 10, 22, 10, 22, + 23, 9, 23, 9, 23, 9, 23, 9, 4, 28, 4, 28, 4, 28, 4, 28, + 17, 15, 17, 15, 17, 15, 17, 15, 30, 2, 30, 2, 30, 2, 30, 2, + 11, 21, 11, 21, 11, 21, 11, 21, 24, 8, 24, 8, 24, 8, 24, 8, + 5, 27, 5, 27, 5, 27, 5, 27, 18, 14, 18, 14, 18, 14, 18, 14, + 31, 1, 31, 1, 31, 1, 31, 1, 12, 20, 12, 20, 12, 20, 12, 20, + 25, 7, 25, 7, 25, 7, 25, 7, 6, 26, 6, 26, 6, 26, 6, 26, + 19, 13, 19, 13, 19, 13, 19, 13, 32, 0, 32, 0, 32, 0, 32, 0, + 19, 13, 19, 13, 19, 13, 19, 13, 6, 26, 6, 26, 6, 26, 6, 26, // Mode -1 Offset 1600 + 25, 7, 25, 7, 25, 7, 25, 7, 12, 20, 12, 20, 12, 20, 12, 20, + 31, 1, 31, 1, 31, 1, 31, 1, 18, 14, 18, 14, 18, 14, 18, 14, + 5, 27, 5, 27, 5, 27, 5, 27, 24, 8, 24, 8, 24, 8, 24, 8, + 11, 21, 11, 21, 11, 21, 11, 21, 30, 2, 30, 2, 30, 2, 30, 2, + 17, 15, 17, 15, 17, 15, 17, 15, 4, 28, 4, 28, 4, 28, 4, 28, + 23, 9, 23, 9, 23, 9, 23, 9, 10, 22, 10, 22, 10, 22, 10, 22, + 29, 3, 29, 3, 29, 3, 29, 3, 16, 16, 16, 16, 16, 16, 16, 16, + 3, 29, 3, 29, 3, 29, 3, 29, 22, 10, 22, 10, 22, 10, 22, 10, + 9, 23, 9, 23, 9, 23, 9, 23, 28, 4, 28, 4, 28, 4, 28, 4, + 15, 17, 15, 17, 15, 17, 15, 17, 2, 30, 2, 30, 2, 30, 2, 30, + 21, 11, 21, 11, 21, 11, 21, 11, 8, 24, 8, 24, 8, 24, 8, 24, + 27, 5, 27, 5, 27, 5, 27, 5, 14, 18, 14, 18, 14, 18, 14, 18, + 1, 31, 1, 31, 1, 31, 1, 31, 20, 12, 20, 12, 20, 12, 20, 12, + 7, 25, 7, 25, 7, 25, 7, 25, 26, 6, 26, 6, 26, 6, 26, 6, + 13, 19, 13, 19, 13, 19, 13, 19, 32, 0, 32, 0, 32, 0, 32, 0, + 25, 7, 25, 7, 25, 7, 25, 7, 18, 14, 18, 14, 18, 14, 18, 14, // Mode 0 Offset 1856 + 11, 21, 11, 21, 11, 21, 11, 21, 4, 28, 4, 28, 4, 28, 4, 28, + 29, 3, 29, 3, 29, 3, 29, 3, 22, 10, 22, 10, 22, 10, 22, 10, + 15, 17, 15, 17, 15, 17, 15, 17, 8, 24, 8, 24, 8, 24, 8, 24, + 1, 31, 1, 31, 1, 31, 1, 31, 26, 6, 26, 6, 26, 6, 26, 6, + 19, 13, 19, 13, 19, 13, 19, 13, 12, 20, 12, 20, 12, 20, 12, 20, + 5, 27, 5, 27, 5, 27, 5, 27, 30, 2, 30, 2, 30, 2, 30, 2, + 23, 9, 23, 9, 23, 9, 23, 9, 16, 16, 16, 16, 16, 16, 16, 16, + 9, 23, 9, 23, 9, 23, 9, 23, 2, 30, 2, 30, 2, 30, 2, 30, + 27, 5, 27, 5, 27, 5, 27, 5, 20, 12, 20, 12, 20, 12, 20, 12, + 13, 19, 13, 19, 13, 19, 13, 19, 6, 26, 6, 26, 6, 26, 6, 26, + 31, 1, 31, 1, 31, 1, 31, 1, 24, 8, 24, 8, 24, 8, 24, 8, + 17, 15, 17, 15, 17, 15, 17, 15, 10, 22, 10, 22, 10, 22, 10, 22, + 3, 29, 3, 29, 3, 29, 3, 29, 28, 4, 28, 4, 28, 4, 28, 4, + 21, 11, 21, 11, 21, 11, 21, 11, 14, 18, 14, 18, 14, 18, 14, 18, + 7, 25, 7, 25, 7, 25, 7, 25, 32, 0, 32, 0, 32, 0, 32, 0, + 29, 3, 29, 3, 29, 3, 29, 3, 26, 6, 26, 6, 26, 6, 26, 6, // Mode 1 Offset 2112 + 23, 9, 23, 9, 23, 9, 23, 9, 20, 12, 20, 12, 20, 12, 20, 12, + 17, 15, 17, 15, 17, 15, 17, 15, 14, 18, 14, 18, 14, 18, 14, 18, + 11, 21, 11, 21, 11, 21, 11, 21, 8, 24, 8, 24, 8, 24, 8, 24, + 5, 27, 5, 27, 5, 27, 5, 27, 2, 30, 2, 30, 2, 30, 2, 30, + 31, 1, 31, 1, 31, 1, 31, 1, 28, 4, 28, 4, 28, 4, 28, 4, + 25, 7, 25, 7, 25, 7, 25, 7, 22, 10, 22, 10, 22, 10, 22, 10, + 19, 13, 19, 13, 19, 13, 19, 13, 16, 16, 16, 16, 16, 16, 16, 16, + 13, 19, 13, 19, 13, 19, 13, 19, 10, 22, 10, 22, 10, 22, 10, 22, + 7, 25, 7, 25, 7, 25, 7, 25, 4, 28, 4, 28, 4, 28, 4, 28, + 1, 31, 1, 31, 1, 31, 1, 31, 30, 2, 30, 2, 30, 2, 30, 2, + 27, 5, 27, 5, 27, 5, 27, 5, 24, 8, 24, 8, 24, 8, 24, 8, + 21, 11, 21, 11, 21, 11, 21, 11, 18, 14, 18, 14, 18, 14, 18, 14, + 15, 17, 15, 17, 15, 17, 15, 17, 12, 20, 12, 20, 12, 20, 12, 20, + 9, 23, 9, 23, 9, 23, 9, 23, 6, 26, 6, 26, 6, 26, 6, 26, + 3, 29, 3, 29, 3, 29, 3, 29, 32, 0, 32, 0, 32, 0, 32, 0, +}; + + +// Chroma linear interpolation filter weights for width 4, horizontal wide angle modes. +//ALIGNED(32) const int8_t intra_chroma_linear_interpolation_weights_w4_hor_wide_angle[] = { +// 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, // Mode -12 +// 11, 21, 11, 21, 11, 21, 11, 21, 22, 10, 22, 10, 22, 10, 22, 10, 1, 31, 1, 31, 1, 31, 1, 31, 12, 20, 12, 20, 12, 20, 12, 20, // Mode -11 +// 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, // Mode -10 +// 21, 11, 21, 11, 21, 11, 21, 11, 10, 22, 10, 22, 10, 22, 10, 22, 31, 1, 31, 1, 31, 1, 31, 1, 20, 12, 20, 12, 20, 12, 20, 12, // Mode -9 +// 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, // Mode -8 +// 26, 6, 26, 6, 26, 6, 26, 6, 20, 12, 20, 12, 20, 12, 20, 12, 14, 18, 14, 18, 14, 18, 14, 18, 8, 24, 8, 24, 8, 24, 8, 24, // Mode -7 +// 10, 22, 10, 22, 10, 22, 10, 22, 20, 12, 20, 12, 20, 12, 20, 12, 30, 2, 30, 2, 30, 2, 30, 2, 8, 24, 8, 24, 8, 24, 8, 24, // Mode -6 +// 23, 9, 23, 9, 23, 9, 23, 9, 14, 18, 14, 18, 14, 18, 14, 18, 5, 27, 5, 27, 5, 27, 5, 27, 28, 4, 28, 4, 28, 4, 28, 4, // Mode -5 +// 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, // Mode -4 +// 7, 25, 7, 25, 7, 25, 7, 25, 14, 18, 14, 18, 14, 18, 14, 18, 21, 11, 21, 11, 21, 11, 21, 11, 28, 4, 28, 4, 28, 4, 28, 4, // Mode -3 +// 13, 19, 13, 19, 13, 19, 13, 19, 26, 6, 26, 6, 26, 6, 26, 6, 7, 25, 7, 25, 7, 25, 7, 25, 20, 12, 20, 12, 20, 12, 20, 12, // Mode -2 +// 19, 13, 19, 13, 19, 13, 19, 13, 6, 26, 6, 26, 6, 26, 6, 26, 25, 7, 25, 7, 25, 7, 25, 7, 12, 20, 12, 20, 12, 20, 12, 20, // Mode -1 +// 25, 7, 25, 7, 25, 7, 25, 7, 18, 14, 18, 14, 18, 14, 18, 14, 11, 21, 11, 21, 11, 21, 11, 21, 4, 28, 4, 28, 4, 28, 4, 28, // Mode 0 +// 29, 3, 29, 3, 29, 3, 29, 3, 26, 6, 26, 6, 26, 6, 26, 6, 23, 9, 23, 9, 23, 9, 23, 9, 20, 12, 20, 12, 20, 12, 20, 12, // Mode 1 +//}; + + +// NOTE: this table can also be used by horizontal w4 and w8 wide angle functions since their tables are just a subset of this one. +// Chroma linear interpolation filter weights for width 4, horizontal wide angle modes. +static ALIGNED(32) const int8_t intra_chroma_linear_interpolation_weights_w16_hor_wide_angle[] = { + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, // Mode -12 + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, + 11, 21, 11, 21, 11, 21, 11, 21, 22, 10, 22, 10, 22, 10, 22, 10, // Mode -11 + 1, 31, 1, 31, 1, 31, 1, 31, 12, 20, 12, 20, 12, 20, 12, 20, + 23, 9, 23, 9, 23, 9, 23, 9, 2, 30, 2, 30, 2, 30, 2, 30, + 13, 19, 13, 19, 13, 19, 13, 19, 24, 8, 24, 8, 24, 8, 24, 8, + 3, 29, 3, 29, 3, 29, 3, 29, 14, 18, 14, 18, 14, 18, 14, 18, + 25, 7, 25, 7, 25, 7, 25, 7, 4, 28, 4, 28, 4, 28, 4, 28, + 15, 17, 15, 17, 15, 17, 15, 17, 26, 6, 26, 6, 26, 6, 26, 6, + 5, 27, 5, 27, 5, 27, 5, 27, 16, 16, 16, 16, 16, 16, 16, 16, + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, // Mode -10 + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, + 21, 11, 21, 11, 21, 11, 21, 11, 10, 22, 10, 22, 10, 22, 10, 22, // Mode -9 + 31, 1, 31, 1, 31, 1, 31, 1, 20, 12, 20, 12, 20, 12, 20, 12, + 9, 23, 9, 23, 9, 23, 9, 23, 30, 2, 30, 2, 30, 2, 30, 2, + 19, 13, 19, 13, 19, 13, 19, 13, 8, 24, 8, 24, 8, 24, 8, 24, + 29, 3, 29, 3, 29, 3, 29, 3, 18, 14, 18, 14, 18, 14, 18, 14, + 7, 25, 7, 25, 7, 25, 7, 25, 28, 4, 28, 4, 28, 4, 28, 4, + 17, 15, 17, 15, 17, 15, 17, 15, 6, 26, 6, 26, 6, 26, 6, 26, + 27, 5, 27, 5, 27, 5, 27, 5, 16, 16, 16, 16, 16, 16, 16, 16, + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, // Mode -8 + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, + 26, 6, 26, 6, 26, 6, 26, 6, 20, 12, 20, 12, 20, 12, 20, 12, // Mode -7 + 14, 18, 14, 18, 14, 18, 14, 18, 8, 24, 8, 24, 8, 24, 8, 24, + 2, 30, 2, 30, 2, 30, 2, 30, 28, 4, 28, 4, 28, 4, 28, 4, + 22, 10, 22, 10, 22, 10, 22, 10, 16, 16, 16, 16, 16, 16, 16, 16, + 10, 22, 10, 22, 10, 22, 10, 22, 4, 28, 4, 28, 4, 28, 4, 28, + 30, 2, 30, 2, 30, 2, 30, 2, 24, 8, 24, 8, 24, 8, 24, 8, + 18, 14, 18, 14, 18, 14, 18, 14, 12, 20, 12, 20, 12, 20, 12, 20, + 6, 26, 6, 26, 6, 26, 6, 26, 32, 0, 32, 0, 32, 0, 32, 0, + 10, 22, 10, 22, 10, 22, 10, 22, 20, 12, 20, 12, 20, 12, 20, 12, // Mode -6 + 30, 2, 30, 2, 30, 2, 30, 2, 8, 24, 8, 24, 8, 24, 8, 24, + 18, 14, 18, 14, 18, 14, 18, 14, 28, 4, 28, 4, 28, 4, 28, 4, + 6, 26, 6, 26, 6, 26, 6, 26, 16, 16, 16, 16, 16, 16, 16, 16, + 26, 6, 26, 6, 26, 6, 26, 6, 4, 28, 4, 28, 4, 28, 4, 28, + 14, 18, 14, 18, 14, 18, 14, 18, 24, 8, 24, 8, 24, 8, 24, 8, + 2, 30, 2, 30, 2, 30, 2, 30, 12, 20, 12, 20, 12, 20, 12, 20, + 22, 10, 22, 10, 22, 10, 22, 10, 32, 0, 32, 0, 32, 0, 32, 0, + 23, 9, 23, 9, 23, 9, 23, 9, 14, 18, 14, 18, 14, 18, 14, 18, // Mode -5 + 5, 27, 5, 27, 5, 27, 5, 27, 28, 4, 28, 4, 28, 4, 28, 4, + 19, 13, 19, 13, 19, 13, 19, 13, 10, 22, 10, 22, 10, 22, 10, 22, + 1, 31, 1, 31, 1, 31, 1, 31, 24, 8, 24, 8, 24, 8, 24, 8, + 15, 17, 15, 17, 15, 17, 15, 17, 6, 26, 6, 26, 6, 26, 6, 26, + 29, 3, 29, 3, 29, 3, 29, 3, 20, 12, 20, 12, 20, 12, 20, 12, + 11, 21, 11, 21, 11, 21, 11, 21, 2, 30, 2, 30, 2, 30, 2, 30, + 25, 7, 25, 7, 25, 7, 25, 7, 16, 16, 16, 16, 16, 16, 16, 16, + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, // Mode -4 + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, + 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, + 7, 25, 7, 25, 7, 25, 7, 25, 14, 18, 14, 18, 14, 18, 14, 18, // Mode -3 + 21, 11, 21, 11, 21, 11, 21, 11, 28, 4, 28, 4, 28, 4, 28, 4, + 3, 29, 3, 29, 3, 29, 3, 29, 10, 22, 10, 22, 10, 22, 10, 22, + 17, 15, 17, 15, 17, 15, 17, 15, 24, 8, 24, 8, 24, 8, 24, 8, + 31, 1, 31, 1, 31, 1, 31, 1, 6, 26, 6, 26, 6, 26, 6, 26, + 13, 19, 13, 19, 13, 19, 13, 19, 20, 12, 20, 12, 20, 12, 20, 12, + 27, 5, 27, 5, 27, 5, 27, 5, 2, 30, 2, 30, 2, 30, 2, 30, + 9, 23, 9, 23, 9, 23, 9, 23, 16, 16, 16, 16, 16, 16, 16, 16, + 13, 19, 13, 19, 13, 19, 13, 19, 26, 6, 26, 6, 26, 6, 26, 6, // Mode -2 + 7, 25, 7, 25, 7, 25, 7, 25, 20, 12, 20, 12, 20, 12, 20, 12, + 1, 31, 1, 31, 1, 31, 1, 31, 14, 18, 14, 18, 14, 18, 14, 18, + 27, 5, 27, 5, 27, 5, 27, 5, 8, 24, 8, 24, 8, 24, 8, 24, + 21, 11, 21, 11, 21, 11, 21, 11, 2, 30, 2, 30, 2, 30, 2, 30, + 15, 17, 15, 17, 15, 17, 15, 17, 28, 4, 28, 4, 28, 4, 28, 4, + 9, 23, 9, 23, 9, 23, 9, 23, 22, 10, 22, 10, 22, 10, 22, 10, + 3, 29, 3, 29, 3, 29, 3, 29, 16, 16, 16, 16, 16, 16, 16, 16, + 19, 13, 19, 13, 19, 13, 19, 13, 6, 26, 6, 26, 6, 26, 6, 26, // Mode -1 + 25, 7, 25, 7, 25, 7, 25, 7, 12, 20, 12, 20, 12, 20, 12, 20, + 31, 1, 31, 1, 31, 1, 31, 1, 18, 14, 18, 14, 18, 14, 18, 14, + 5, 27, 5, 27, 5, 27, 5, 27, 24, 8, 24, 8, 24, 8, 24, 8, + 11, 21, 11, 21, 11, 21, 11, 21, 30, 2, 30, 2, 30, 2, 30, 2, + 17, 15, 17, 15, 17, 15, 17, 15, 4, 28, 4, 28, 4, 28, 4, 28, + 23, 9, 23, 9, 23, 9, 23, 9, 10, 22, 10, 22, 10, 22, 10, 22, + 29, 3, 29, 3, 29, 3, 29, 3, 16, 16, 16, 16, 16, 16, 16, 16, + 25, 7, 25, 7, 25, 7, 25, 7, 18, 14, 18, 14, 18, 14, 18, 14, // Mode 0 + 11, 21, 11, 21, 11, 21, 11, 21, 4, 28, 4, 28, 4, 28, 4, 28, + 29, 3, 29, 3, 29, 3, 29, 3, 22, 10, 22, 10, 22, 10, 22, 10, + 15, 17, 15, 17, 15, 17, 15, 17, 8, 24, 8, 24, 8, 24, 8, 24, + 1, 31, 1, 31, 1, 31, 1, 31, 26, 6, 26, 6, 26, 6, 26, 6, + 19, 13, 19, 13, 19, 13, 19, 13, 12, 20, 12, 20, 12, 20, 12, 20, + 5, 27, 5, 27, 5, 27, 5, 27, 30, 2, 30, 2, 30, 2, 30, 2, + 23, 9, 23, 9, 23, 9, 23, 9, 16, 16, 16, 16, 16, 16, 16, 16, + 29, 3, 29, 3, 29, 3, 29, 3, 26, 6, 26, 6, 26, 6, 26, 6, // Mode 1 + 23, 9, 23, 9, 23, 9, 23, 9, 20, 12, 20, 12, 20, 12, 20, 12, + 17, 15, 17, 15, 17, 15, 17, 15, 14, 18, 14, 18, 14, 18, 14, 18, + 11, 21, 11, 21, 11, 21, 11, 21, 8, 24, 8, 24, 8, 24, 8, 24, + 5, 27, 5, 27, 5, 27, 5, 27, 2, 30, 2, 30, 2, 30, 2, 30, + 31, 1, 31, 1, 31, 1, 31, 1, 28, 4, 28, 4, 28, 4, 28, 4, + 25, 7, 25, 7, 25, 7, 25, 7, 22, 10, 22, 10, 22, 10, 22, 10, + 19, 13, 19, 13, 19, 13, 19, 13, 16, 16, 16, 16, 16, 16, 16, 16, +}; + + +// Weights for intra pdpc w4 horizontal. +static ALIGNED(32) const int16_t intra_pdpc_w4_hor_weight[] = { + 32, 32, 32, 32, 8, 8, 8, 8, 2, 2, 2, 2, 0, 0, 0, 0, // Scale 0 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 32, 32, 32, 32, 16, 16, 16, 16, 8, 8, 8, 8, 4, 4, 4, 4, // Scale 1 + 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 32, 32, 32, 32, 32, 32, 32, 32, 16, 16, 16, 16, 16, 16, 16, 16, // Scale 2 + 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, + 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + + +// Weights for intra pdpc w8 horizontal. +static ALIGNED(32) const int16_t intra_pdpc_w8_hor_weight[] = { + 32, 32, 32, 32, 32, 32, 32, 32, 8, 8, 8, 8, 8, 8, 8, 8, // Scale 0 + 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 32, 32, 32, 32, 32, 32, 32, 32, 16, 16, 16, 16, 16, 16, 16, 16, // Scale 1 + 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, + 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, // Scale 2 + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + + +// Weights for intra pdpc w4 vertical. +static ALIGNED(32) const int16_t intra_pdpc_w4_ver_weight[] = { + 32, 8, 2, 0, 32, 8, 2, 0, 32, 8, 2, 0, 32, 8, 2, 0, // Scale 0 + 32, 16, 8, 4, 32, 16, 8, 4, 32, 16, 8, 4, 32, 16, 8, 4, // Scale 1 + 32, 32, 16, 16, 32, 32, 16, 16, 32, 32, 16, 16, 32, 32, 16, 16, // Scale 2 +}; + + +// Weights for intra pdpc w8 vertical. +static ALIGNED(32) const int16_t intra_pdpc_w8_ver_weight[] = { + 32, 8, 2, 0, 0, 0, 0, 0, 32, 8, 2, 0, 0, 0, 0, 0, // Scale 0 + 32, 16, 8, 4, 2, 1, 0, 0, 32, 16, 8, 4, 2, 1, 0, 0, // Scale 1 + 32, 32, 16, 16, 8, 8, 4, 4, 32, 32, 16, 16, 8, 8, 4, 4, // Scale 2 +}; + + +// Weights for intra pdpc w16 vertical. +static ALIGNED(32) const int16_t intra_pdpc_w16_ver_weight[] = { + 32, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Scale 0 + 32, 16, 8, 4, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Scale 1 + 32, 32, 16, 16, 8, 8, 4, 4, 2, 2, 1, 1, 0, 0, 0, 0, // Scale 2 +}; + + +// Weights for improved PDPC + +// Weights for improved intra pdpc w4 vertical. +static ALIGNED(32) const uint8_t intra_pdpc_w4_ver_improved_weight[] = { + 32, 32, 56, 8, 62, 2, 64, 0, 32, 32, 56, 8, 62, 2, 64, 0, // Scale 0 + 32, 32, 48, 16, 56, 8, 60, 4, 32, 32, 48, 16, 56, 8, 60, 4, // Scale 1 + 32, 32, 32, 32, 48, 16, 48, 16, 32, 32, 32, 32, 48, 16, 48, 16, // Scale 2 +}; + +// Weights for improved intra pdpc w8 vertical. +static ALIGNED(32) const uint8_t intra_pdpc_w8_ver_improved_weight[] = { + 32, 32, 56, 8, 62, 2, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 32, 32, 56, 8, 62, 2, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, // Scale 0 + 32, 32, 48, 16, 56, 8, 60, 4, 62, 2, 63, 1, 64, 0, 64, 0, 32, 32, 48, 16, 56, 8, 60, 4, 62, 2, 63, 1, 64, 0, 64, 0, // Scale 1 + 32, 32, 32, 32, 48, 16, 48, 16, 56, 8, 56, 8, 60, 4, 60, 4, 32, 32, 32, 32, 48, 16, 48, 16, 56, 8, 56, 8, 60, 4, 60, 4, // Scale 2 +}; + +// Weights for improved intra pdpc w16 vertical. +static ALIGNED(32) const uint8_t intra_pdpc_w16_ver_improved_weight[] = { + 32, 32, 56, 8, 62, 2, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, // Scale 0 + 32, 32, 48, 16, 56, 8, 60, 4, 62, 2, 63, 1, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, // Scale 1 + 32, 32, 32, 32, 48, 16, 48, 16, 56, 8, 56, 8, 60, 4, 60, 4, 62, 2, 62, 2, 63, 1, 63, 1, 64, 0, 64, 0, 64, 0, 64, 0, // Scale 2 +}; + + +// Weights for improved intra pdpc w4 horizontal. +static ALIGNED(32) const uint8_t intra_pdpc_w4_hor_improved_weight[] = { + 32, 32, 32, 32, 32, 32, 32, 32, 56, 8, 56, 8, 56, 8, 56, 8, 62, 2, 62, 2, 62, 2, 62, 2, 64, 0, 64, 0, 64, 0, 64, 0, // Scale 0 + 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, + 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, + 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, + 32, 32, 32, 32, 32, 32, 32, 32, 48, 16, 48, 16, 48, 16, 48, 16, 56, 8, 56, 8, 56, 8, 56, 8, 60, 4, 60, 4, 60, 4, 60, 4, // Scale 1 + 62, 2, 62, 2, 62, 2, 62, 2, 63, 1, 63, 1, 63, 1, 63, 1, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, + 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, + 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 48, 16, 48, 16, 48, 16, 48, 16, 48, 16, 48, 16, 48, 16, 48, 16, // Scale 2 + 56, 8, 56, 8, 56, 8, 56, 8, 56, 8, 56, 8, 56, 8, 56, 8, 60, 4, 60, 4, 60, 4, 60, 4, 60, 4, 60, 4, 60, 4, 60, 4, + 62, 2, 62, 2, 62, 2, 62, 2, 62, 2, 62, 2, 62, 2, 62, 2, 63, 1, 63, 1, 63, 1, 63, 1, 63, 1, 63, 1, 63, 1, 63, 1, + 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, +}; + +// Weights for improved intra pdpc w8 horizontal. +static ALIGNED(32) const uint8_t intra_pdpc_w8_hor_improved_weight[] = { + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 56, 8, 56, 8, 56, 8, 56, 8, 56, 8, 56, 8, 56, 8, 56, 8, // Scale 0 + 62, 2, 62, 2, 62, 2, 62, 2, 62, 2, 62, 2, 62, 2, 62, 2, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, + 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, + 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, + 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, + 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, + 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, + 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 48, 16, 48, 16, 48, 16, 48, 16, 48, 16, 48, 16, 48, 16, 48, 16, // Scale 1 + 56, 8, 56, 8, 56, 8, 56, 8, 56, 8, 56, 8, 56, 8, 56, 8, 60, 4, 60, 4, 60, 4, 60, 4, 60, 4, 60, 4, 60, 4, 60, 4, + 62, 2, 62, 2, 62, 2, 62, 2, 62, 2, 62, 2, 62, 2, 62, 2, 63, 1, 63, 1, 63, 1, 63, 1, 63, 1, 63, 1, 63, 1, 63, 1, + 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, + 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, + 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, + 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, + 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, // Scale 2 + 48, 16, 48, 16, 48, 16, 48, 16, 48, 16, 48, 16, 48, 16, 48, 16, 48, 16, 48, 16, 48, 16, 48, 16, 48, 16, 48, 16, 48, 16, 48, 16, + 56, 8, 56, 8, 56, 8, 56, 8, 56, 8, 56, 8, 56, 8, 56, 8, 56, 8, 56, 8, 56, 8, 56, 8, 56, 8, 56, 8, 56, 8, 56, 8, + 60, 4, 60, 4, 60, 4, 60, 4, 60, 4, 60, 4, 60, 4, 60, 4, 60, 4, 60, 4, 60, 4, 60, 4, 60, 4, 60, 4, 60, 4, 60, 4, + 62, 2, 62, 2, 62, 2, 62, 2, 62, 2, 62, 2, 62, 2, 62, 2, 62, 2, 62, 2, 62, 2, 62, 2, 62, 2, 62, 2, 62, 2, 62, 2, + 63, 1, 63, 1, 63, 1, 63, 1, 63, 1, 63, 1, 63, 1, 63, 1, 63, 1, 63, 1, 63, 1, 63, 1, 63, 1, 63, 1, 63, 1, 63, 1, + 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, + 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, +}; + + +// Pre-calculated shifted inverse angle sums for pdpc for y- and x-values [0, 64]. Grouped by mode_disp. +// Index by y or x based on pdpc direction. +static ALIGNED(32) const int16_t intra_pdpc_shifted_inv_angle_sum[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Mode disp 0 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, 864, 896, 928, 960, 992, 1024, // Mode disp 1 +1056, 1088, 1120, 1152, 1184, 1216, 1248, 1280, 1312, 1344, 1376, 1408, 1440, 1472, 1504, 1536, 1568, 1600, 1632, 1664, 1696, 1728, 1760, 1792, 1824, 1856, 1888, 1920, 1952, 1984, 2016, 2048, + 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240, 256, 272, 288, 304, 320, 336, 352, 368, 384, 400, 416, 432, 448, 464, 480, 496, 512, // Mode disp 2 + 528, 544, 560, 576, 592, 608, 624, 640, 656, 672, 688, 704, 720, 736, 752, 768, 784, 800, 816, 832, 848, 864, 880, 896, 912, 928, 944, 960, 976, 992, 1008, 1024, + 11, 21, 32, 43, 53, 64, 75, 85, 96, 107, 117, 128, 139, 149, 160, 171, 181, 192, 203, 213, 224, 235, 245, 256, 267, 277, 288, 299, 309, 320, 331, 341, // Mode disp 3 + 352, 363, 373, 384, 395, 405, 416, 427, 437, 448, 459, 469, 480, 491, 501, 512, 523, 533, 544, 555, 565, 576, 587, 597, 608, 619, 629, 640, 651, 661, 672, 683, + 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, // Mode disp 4 + 264, 272, 280, 288, 296, 304, 312, 320, 328, 336, 344, 352, 360, 368, 376, 384, 392, 400, 408, 416, 424, 432, 440, 448, 456, 464, 472, 480, 488, 496, 504, 512, + 5, 11, 16, 21, 27, 32, 37, 43, 48, 53, 59, 64, 69, 75, 80, 85, 91, 96, 101, 107, 112, 117, 123, 128, 133, 139, 144, 149, 155, 160, 165, 171, // Mode disp 5 + 176, 181, 187, 192, 197, 203, 208, 213, 219, 224, 229, 235, 240, 245, 251, 256, 261, 267, 272, 277, 283, 288, 293, 299, 304, 309, 315, 320, 325, 331, 336, 341, + 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, 100, 104, 108, 112, 116, 120, 124, 128, // Mode disp 6 + 132, 136, 140, 144, 148, 152, 156, 160, 164, 168, 172, 176, 180, 184, 188, 192, 196, 200, 204, 208, 212, 216, 220, 224, 228, 232, 236, 240, 244, 248, 252, 256, + 3, 6, 10, 13, 16, 19, 22, 26, 29, 32, 35, 38, 42, 45, 48, 51, 54, 58, 61, 64, 67, 70, 74, 77, 80, 83, 86, 90, 93, 96, 99, 102, // Mode disp 7 + 106, 109, 112, 115, 118, 122, 125, 128, 131, 134, 138, 141, 144, 147, 150, 154, 157, 160, 163, 166, 170, 173, 176, 179, 182, 186, 189, 192, 195, 198, 202, 205, + 3, 5, 8, 11, 13, 16, 19, 21, 24, 27, 29, 32, 35, 37, 40, 43, 45, 48, 51, 53, 56, 59, 61, 64, 67, 69, 72, 75, 77, 80, 83, 85, // Mode disp 8 + 88, 91, 93, 96, 99, 101, 104, 107, 109, 112, 115, 117, 120, 123, 125, 128, 131, 133, 136, 139, 141, 144, 147, 149, 152, 155, 157, 160, 163, 165, 168, 171, + 2, 5, 7, 9, 11, 14, 16, 18, 21, 23, 25, 27, 30, 32, 34, 37, 39, 41, 43, 46, 48, 50, 53, 55, 57, 59, 62, 64, 66, 69, 71, 73, // Mode disp 9 + 75, 78, 80, 82, 85, 87, 89, 91, 94, 96, 98, 101, 103, 105, 107, 110, 112, 114, 117, 119, 121, 123, 126, 128, 130, 133, 135, 137, 139, 142, 144, 146, + 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, // Mode disp 10 + 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126, 128, + 2, 4, 5, 7, 9, 11, 12, 14, 16, 18, 20, 21, 23, 25, 27, 28, 30, 32, 34, 36, 37, 39, 41, 43, 44, 46, 48, 50, 52, 53, 55, 57, // Mode disp 11 + 59, 60, 62, 64, 66, 68, 69, 71, 73, 75, 76, 78, 80, 82, 84, 85, 87, 89, 91, 92, 94, 96, 98, 100, 101, 103, 105, 107, 108, 110, 112, 114, + 2, 3, 5, 6, 8, 10, 11, 13, 14, 16, 18, 19, 21, 22, 24, 26, 27, 29, 30, 32, 34, 35, 37, 38, 40, 42, 43, 45, 46, 48, 50, 51, // Mode disp 12 + 53, 54, 56, 58, 59, 61, 62, 64, 66, 67, 69, 70, 72, 74, 75, 77, 78, 80, 82, 83, 85, 86, 88, 90, 91, 93, 94, 96, 98, 99, 101, 102, + 1, 3, 4, 6, 7, 8, 10, 11, 13, 14, 15, 17, 18, 19, 21, 22, 24, 25, 26, 28, 29, 31, 32, 33, 35, 36, 38, 39, 40, 42, 43, 45, // Mode disp 13 + 46, 47, 49, 50, 51, 53, 54, 56, 57, 58, 60, 61, 63, 64, 65, 67, 68, 70, 71, 72, 74, 75, 76, 78, 79, 81, 82, 83, 85, 86, 88, 89, + 1, 2, 4, 5, 6, 7, 9, 10, 11, 12, 14, 15, 16, 17, 18, 20, 21, 22, 23, 25, 26, 27, 28, 30, 31, 32, 33, 34, 36, 37, 38, 39, // Mode disp 14 + 41, 42, 43, 44, 46, 47, 48, 49, 50, 52, 53, 54, 55, 57, 58, 59, 60, 62, 63, 64, 65, 66, 68, 69, 70, 71, 73, 74, 75, 76, 78, 79, + 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 28, 29, 30, 31, 32, 33, 34, 35, // Mode disp 15 + 36, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 70, 71, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, // Mode disp 16 + 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, + 1, 2, 3, 4, 5, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 27, 28, 29, // Mode disp 17 + 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 1, 2, 2, 3, 4, 5, 6, 7, 7, 8, 9, 10, 11, 11, 12, 13, 14, 15, 16, 16, 17, 18, 19, 20, 21, 21, 22, 23, 24, 25, 25, 26, // Mode disp 18 + 27, 28, 29, 30, 30, 31, 32, 33, 34, 34, 35, 36, 37, 38, 39, 39, 40, 41, 42, 43, 43, 44, 45, 46, 47, 48, 48, 49, 50, 51, 52, 53, + 1, 1, 2, 3, 4, 4, 5, 6, 6, 7, 8, 9, 9, 10, 11, 11, 12, 13, 14, 14, 15, 16, 16, 17, 18, 18, 19, 20, 21, 21, 22, 23, // Mode disp 19 + 23, 24, 25, 26, 26, 27, 28, 28, 29, 30, 31, 31, 32, 33, 33, 34, 35, 36, 36, 37, 38, 38, 39, 40, 41, 41, 42, 43, 43, 44, 45, 46, + 1, 1, 2, 3, 3, 4, 4, 5, 6, 6, 7, 8, 8, 9, 9, 10, 11, 11, 12, 13, 13, 14, 14, 15, 16, 16, 17, 18, 18, 19, 19, 20, // Mode disp 20 + 21, 21, 22, 23, 23, 24, 24, 25, 26, 26, 27, 28, 28, 29, 29, 30, 31, 31, 32, 33, 33, 34, 34, 35, 36, 36, 37, 38, 38, 39, 39, 40, + 1, 1, 2, 2, 3, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 15, 15, 16, 16, 17, 17, 18, // Mode disp 21 + 18, 19, 20, 20, 21, 21, 22, 22, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 29, 29, 30, 30, 31, 31, 32, 33, 33, 34, 34, 35, 35, 36, + 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, // Mode disp 22 + 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30, 30, 31, 31, 32, 32, + 0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 11, 12, 12, 13, 13, 14, 14, // Mode disp 23 + 14, 15, 15, 16, 16, 17, 17, 18, 18, 18, 19, 19, 20, 20, 21, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 25, 26, 26, 27, 27, 28, 28, + 0, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 12, 12, // Mode disp 24 + 12, 13, 13, 13, 14, 14, 15, 15, 15, 16, 16, 16, 17, 17, 18, 18, 18, 19, 19, 19, 20, 20, 21, 21, 21, 22, 22, 22, 23, 23, 24, 24, + 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 10, 10, // Mode disp 25 + 10, 11, 11, 11, 12, 12, 12, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 19, 19, 20, 20, + 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, // Mode disp 26 + 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, + 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, // Mode disp 27 + 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 12, 12, 12, + 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, // Mode disp 28 + 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, + 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, // Mode disp 29 + 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, + 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, // Mode disp 30 + 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // Mode disp 31 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, +}; + + +// TODO: prune this table. These is a ton of duplicates. Pruning may introduce some extra logic, but it will save a lot of space and probably speed up memory access. +// NOTE: The vectors from this table can be only used up from mode disp 6. The reference samples are too sparse for vectorized shuffle below mode disp 6. +// Shuffle vectors for w4 horizontal pdpc. +static ALIGNED(32) const int8_t intra_pdpc_shuffle_vectors_w4_hor[] = { + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, // Mode disp 0 + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x020, 0x021, 0x022, 0x023, 0x040, 0x041, 0x042, 0x043, 0x060, 0x061, 0x062, 0x063, // Mode disp 1 + 0x000, 0x001, 0x002, 0x003, 0x020, 0x021, 0x022, 0x023, 0x040, 0x041, 0x042, 0x043, 0x060, 0x061, 0x062, 0x063, + 0x000, 0x001, 0x002, 0x003, 0x020, 0x021, 0x022, 0x023, 0x040, 0x041, 0x042, 0x043, 0x060, 0x061, 0x062, 0x063, + 0x000, 0x001, 0x002, 0x003, 0x020, 0x021, 0x022, 0x023, 0x040, 0x041, 0x042, 0x043, 0x060, 0x061, 0x062, 0x063, + 0x000, 0x001, 0x002, 0x003, 0x020, 0x021, 0x022, 0x023, 0x040, 0x041, 0x042, 0x043, 0x060, 0x061, 0x062, 0x063, + 0x000, 0x001, 0x002, 0x003, 0x020, 0x021, 0x022, 0x023, 0x040, 0x041, 0x042, 0x043, 0x060, 0x061, 0x062, 0x063, + 0x000, 0x001, 0x002, 0x003, 0x020, 0x021, 0x022, 0x023, 0x040, 0x041, 0x042, 0x043, 0x060, 0x061, 0x062, 0x063, + 0x000, 0x001, 0x002, 0x003, 0x020, 0x021, 0x022, 0x023, 0x040, 0x041, 0x042, 0x043, 0x060, 0x061, 0x062, 0x063, + 0x000, 0x001, 0x002, 0x003, 0x020, 0x021, 0x022, 0x023, 0x040, 0x041, 0x042, 0x043, 0x060, 0x061, 0x062, 0x063, + 0x000, 0x001, 0x002, 0x003, 0x020, 0x021, 0x022, 0x023, 0x040, 0x041, 0x042, 0x043, 0x060, 0x061, 0x062, 0x063, + 0x000, 0x001, 0x002, 0x003, 0x020, 0x021, 0x022, 0x023, 0x040, 0x041, 0x042, 0x043, 0x060, 0x061, 0x062, 0x063, + 0x000, 0x001, 0x002, 0x003, 0x020, 0x021, 0x022, 0x023, 0x040, 0x041, 0x042, 0x043, 0x060, 0x061, 0x062, 0x063, + 0x000, 0x001, 0x002, 0x003, 0x020, 0x021, 0x022, 0x023, 0x040, 0x041, 0x042, 0x043, 0x060, 0x061, 0x062, 0x063, + 0x000, 0x001, 0x002, 0x003, 0x020, 0x021, 0x022, 0x023, 0x040, 0x041, 0x042, 0x043, 0x060, 0x061, 0x062, 0x063, + 0x000, 0x001, 0x002, 0x003, 0x020, 0x021, 0x022, 0x023, 0x040, 0x041, 0x042, 0x043, 0x060, 0x061, 0x062, 0x063, + 0x000, 0x001, 0x002, 0x003, 0x020, 0x021, 0x022, 0x023, 0x040, 0x041, 0x042, 0x043, 0x060, 0x061, 0x062, 0x063, + 0x000, 0x001, 0x002, 0x003, 0x010, 0x011, 0x012, 0x013, 0x020, 0x021, 0x022, 0x023, 0x030, 0x031, 0x032, 0x033, // Mode disp 2 + 0x000, 0x001, 0x002, 0x003, 0x010, 0x011, 0x012, 0x013, 0x020, 0x021, 0x022, 0x023, 0x030, 0x031, 0x032, 0x033, + 0x000, 0x001, 0x002, 0x003, 0x010, 0x011, 0x012, 0x013, 0x020, 0x021, 0x022, 0x023, 0x030, 0x031, 0x032, 0x033, + 0x000, 0x001, 0x002, 0x003, 0x010, 0x011, 0x012, 0x013, 0x020, 0x021, 0x022, 0x023, 0x030, 0x031, 0x032, 0x033, + 0x000, 0x001, 0x002, 0x003, 0x010, 0x011, 0x012, 0x013, 0x020, 0x021, 0x022, 0x023, 0x030, 0x031, 0x032, 0x033, + 0x000, 0x001, 0x002, 0x003, 0x010, 0x011, 0x012, 0x013, 0x020, 0x021, 0x022, 0x023, 0x030, 0x031, 0x032, 0x033, + 0x000, 0x001, 0x002, 0x003, 0x010, 0x011, 0x012, 0x013, 0x020, 0x021, 0x022, 0x023, 0x030, 0x031, 0x032, 0x033, + 0x000, 0x001, 0x002, 0x003, 0x010, 0x011, 0x012, 0x013, 0x020, 0x021, 0x022, 0x023, 0x030, 0x031, 0x032, 0x033, + 0x000, 0x001, 0x002, 0x003, 0x010, 0x011, 0x012, 0x013, 0x020, 0x021, 0x022, 0x023, 0x030, 0x031, 0x032, 0x033, + 0x000, 0x001, 0x002, 0x003, 0x010, 0x011, 0x012, 0x013, 0x020, 0x021, 0x022, 0x023, 0x030, 0x031, 0x032, 0x033, + 0x000, 0x001, 0x002, 0x003, 0x010, 0x011, 0x012, 0x013, 0x020, 0x021, 0x022, 0x023, 0x030, 0x031, 0x032, 0x033, + 0x000, 0x001, 0x002, 0x003, 0x010, 0x011, 0x012, 0x013, 0x020, 0x021, 0x022, 0x023, 0x030, 0x031, 0x032, 0x033, + 0x000, 0x001, 0x002, 0x003, 0x010, 0x011, 0x012, 0x013, 0x020, 0x021, 0x022, 0x023, 0x030, 0x031, 0x032, 0x033, + 0x000, 0x001, 0x002, 0x003, 0x010, 0x011, 0x012, 0x013, 0x020, 0x021, 0x022, 0x023, 0x030, 0x031, 0x032, 0x033, + 0x000, 0x001, 0x002, 0x003, 0x010, 0x011, 0x012, 0x013, 0x020, 0x021, 0x022, 0x023, 0x030, 0x031, 0x032, 0x033, + 0x000, 0x001, 0x002, 0x003, 0x010, 0x011, 0x012, 0x013, 0x020, 0x021, 0x022, 0x023, 0x030, 0x031, 0x032, 0x033, + 0x000, 0x001, 0x002, 0x003, 0x00a, 0x00b, 0x00c, 0x00d, 0x015, 0x016, 0x017, 0x018, 0x020, 0x021, 0x022, 0x023, // Mode disp 3 + 0x000, 0x001, 0x002, 0x003, 0x00b, 0x00c, 0x00d, 0x00e, 0x016, 0x017, 0x018, 0x019, 0x020, 0x021, 0x022, 0x023, + 0x000, 0x001, 0x002, 0x003, 0x00b, 0x00c, 0x00d, 0x00e, 0x015, 0x016, 0x017, 0x018, 0x020, 0x021, 0x022, 0x023, + 0x000, 0x001, 0x002, 0x003, 0x00a, 0x00b, 0x00c, 0x00d, 0x015, 0x016, 0x017, 0x018, 0x020, 0x021, 0x022, 0x023, + 0x000, 0x001, 0x002, 0x003, 0x00b, 0x00c, 0x00d, 0x00e, 0x016, 0x017, 0x018, 0x019, 0x020, 0x021, 0x022, 0x023, + 0x000, 0x001, 0x002, 0x003, 0x00b, 0x00c, 0x00d, 0x00e, 0x015, 0x016, 0x017, 0x018, 0x020, 0x021, 0x022, 0x023, + 0x000, 0x001, 0x002, 0x003, 0x00a, 0x00b, 0x00c, 0x00d, 0x015, 0x016, 0x017, 0x018, 0x020, 0x021, 0x022, 0x023, + 0x000, 0x001, 0x002, 0x003, 0x00b, 0x00c, 0x00d, 0x00e, 0x016, 0x017, 0x018, 0x019, 0x020, 0x021, 0x022, 0x023, + 0x000, 0x001, 0x002, 0x003, 0x00b, 0x00c, 0x00d, 0x00e, 0x015, 0x016, 0x017, 0x018, 0x020, 0x021, 0x022, 0x023, + 0x000, 0x001, 0x002, 0x003, 0x00a, 0x00b, 0x00c, 0x00d, 0x015, 0x016, 0x017, 0x018, 0x020, 0x021, 0x022, 0x023, + 0x000, 0x001, 0x002, 0x003, 0x00b, 0x00c, 0x00d, 0x00e, 0x016, 0x017, 0x018, 0x019, 0x020, 0x021, 0x022, 0x023, + 0x000, 0x001, 0x002, 0x003, 0x00b, 0x00c, 0x00d, 0x00e, 0x015, 0x016, 0x017, 0x018, 0x020, 0x021, 0x022, 0x023, + 0x000, 0x001, 0x002, 0x003, 0x00a, 0x00b, 0x00c, 0x00d, 0x015, 0x016, 0x017, 0x018, 0x020, 0x021, 0x022, 0x023, + 0x000, 0x001, 0x002, 0x003, 0x00b, 0x00c, 0x00d, 0x00e, 0x016, 0x017, 0x018, 0x019, 0x020, 0x021, 0x022, 0x023, + 0x000, 0x001, 0x002, 0x003, 0x00b, 0x00c, 0x00d, 0x00e, 0x015, 0x016, 0x017, 0x018, 0x020, 0x021, 0x022, 0x023, + 0x000, 0x001, 0x002, 0x003, 0x00a, 0x00b, 0x00c, 0x00d, 0x015, 0x016, 0x017, 0x018, 0x020, 0x021, 0x022, 0x023, + 0x000, 0x001, 0x002, 0x003, 0x008, 0x009, 0x00a, 0x00b, 0x010, 0x011, 0x012, 0x013, 0x018, 0x019, 0x01a, 0x01b, // Mode disp 4 + 0x000, 0x001, 0x002, 0x003, 0x008, 0x009, 0x00a, 0x00b, 0x010, 0x011, 0x012, 0x013, 0x018, 0x019, 0x01a, 0x01b, + 0x000, 0x001, 0x002, 0x003, 0x008, 0x009, 0x00a, 0x00b, 0x010, 0x011, 0x012, 0x013, 0x018, 0x019, 0x01a, 0x01b, + 0x000, 0x001, 0x002, 0x003, 0x008, 0x009, 0x00a, 0x00b, 0x010, 0x011, 0x012, 0x013, 0x018, 0x019, 0x01a, 0x01b, + 0x000, 0x001, 0x002, 0x003, 0x008, 0x009, 0x00a, 0x00b, 0x010, 0x011, 0x012, 0x013, 0x018, 0x019, 0x01a, 0x01b, + 0x000, 0x001, 0x002, 0x003, 0x008, 0x009, 0x00a, 0x00b, 0x010, 0x011, 0x012, 0x013, 0x018, 0x019, 0x01a, 0x01b, + 0x000, 0x001, 0x002, 0x003, 0x008, 0x009, 0x00a, 0x00b, 0x010, 0x011, 0x012, 0x013, 0x018, 0x019, 0x01a, 0x01b, + 0x000, 0x001, 0x002, 0x003, 0x008, 0x009, 0x00a, 0x00b, 0x010, 0x011, 0x012, 0x013, 0x018, 0x019, 0x01a, 0x01b, + 0x000, 0x001, 0x002, 0x003, 0x008, 0x009, 0x00a, 0x00b, 0x010, 0x011, 0x012, 0x013, 0x018, 0x019, 0x01a, 0x01b, + 0x000, 0x001, 0x002, 0x003, 0x008, 0x009, 0x00a, 0x00b, 0x010, 0x011, 0x012, 0x013, 0x018, 0x019, 0x01a, 0x01b, + 0x000, 0x001, 0x002, 0x003, 0x008, 0x009, 0x00a, 0x00b, 0x010, 0x011, 0x012, 0x013, 0x018, 0x019, 0x01a, 0x01b, + 0x000, 0x001, 0x002, 0x003, 0x008, 0x009, 0x00a, 0x00b, 0x010, 0x011, 0x012, 0x013, 0x018, 0x019, 0x01a, 0x01b, + 0x000, 0x001, 0x002, 0x003, 0x008, 0x009, 0x00a, 0x00b, 0x010, 0x011, 0x012, 0x013, 0x018, 0x019, 0x01a, 0x01b, + 0x000, 0x001, 0x002, 0x003, 0x008, 0x009, 0x00a, 0x00b, 0x010, 0x011, 0x012, 0x013, 0x018, 0x019, 0x01a, 0x01b, + 0x000, 0x001, 0x002, 0x003, 0x008, 0x009, 0x00a, 0x00b, 0x010, 0x011, 0x012, 0x013, 0x018, 0x019, 0x01a, 0x01b, + 0x000, 0x001, 0x002, 0x003, 0x008, 0x009, 0x00a, 0x00b, 0x010, 0x011, 0x012, 0x013, 0x018, 0x019, 0x01a, 0x01b, + 0x000, 0x001, 0x002, 0x003, 0x006, 0x007, 0x008, 0x009, 0x00b, 0x00c, 0x00d, 0x00e, 0x010, 0x011, 0x012, 0x013, // Mode disp 5 + 0x000, 0x001, 0x002, 0x003, 0x005, 0x006, 0x007, 0x008, 0x00a, 0x00b, 0x00c, 0x00d, 0x010, 0x011, 0x012, 0x013, + 0x000, 0x001, 0x002, 0x003, 0x005, 0x006, 0x007, 0x008, 0x00b, 0x00c, 0x00d, 0x00e, 0x010, 0x011, 0x012, 0x013, + 0x000, 0x001, 0x002, 0x003, 0x006, 0x007, 0x008, 0x009, 0x00b, 0x00c, 0x00d, 0x00e, 0x010, 0x011, 0x012, 0x013, + 0x000, 0x001, 0x002, 0x003, 0x005, 0x006, 0x007, 0x008, 0x00a, 0x00b, 0x00c, 0x00d, 0x010, 0x011, 0x012, 0x013, + 0x000, 0x001, 0x002, 0x003, 0x005, 0x006, 0x007, 0x008, 0x00b, 0x00c, 0x00d, 0x00e, 0x010, 0x011, 0x012, 0x013, + 0x000, 0x001, 0x002, 0x003, 0x006, 0x007, 0x008, 0x009, 0x00b, 0x00c, 0x00d, 0x00e, 0x010, 0x011, 0x012, 0x013, + 0x000, 0x001, 0x002, 0x003, 0x005, 0x006, 0x007, 0x008, 0x00a, 0x00b, 0x00c, 0x00d, 0x010, 0x011, 0x012, 0x013, + 0x000, 0x001, 0x002, 0x003, 0x005, 0x006, 0x007, 0x008, 0x00b, 0x00c, 0x00d, 0x00e, 0x010, 0x011, 0x012, 0x013, + 0x000, 0x001, 0x002, 0x003, 0x006, 0x007, 0x008, 0x009, 0x00b, 0x00c, 0x00d, 0x00e, 0x010, 0x011, 0x012, 0x013, + 0x000, 0x001, 0x002, 0x003, 0x005, 0x006, 0x007, 0x008, 0x00a, 0x00b, 0x00c, 0x00d, 0x010, 0x011, 0x012, 0x013, + 0x000, 0x001, 0x002, 0x003, 0x005, 0x006, 0x007, 0x008, 0x00b, 0x00c, 0x00d, 0x00e, 0x010, 0x011, 0x012, 0x013, + 0x000, 0x001, 0x002, 0x003, 0x006, 0x007, 0x008, 0x009, 0x00b, 0x00c, 0x00d, 0x00e, 0x010, 0x011, 0x012, 0x013, + 0x000, 0x001, 0x002, 0x003, 0x005, 0x006, 0x007, 0x008, 0x00a, 0x00b, 0x00c, 0x00d, 0x010, 0x011, 0x012, 0x013, + 0x000, 0x001, 0x002, 0x003, 0x005, 0x006, 0x007, 0x008, 0x00b, 0x00c, 0x00d, 0x00e, 0x010, 0x011, 0x012, 0x013, + 0x000, 0x001, 0x002, 0x003, 0x006, 0x007, 0x008, 0x009, 0x00b, 0x00c, 0x00d, 0x00e, 0x010, 0x011, 0x012, 0x013, + 0x000, 0x001, 0x002, 0x003, 0x004, 0x005, 0x006, 0x007, 0x008, 0x009, 0x00a, 0x00b, 0x00c, 0x00d, 0x00e, 0x00f, // Mode disp 6 + 0x000, 0x001, 0x002, 0x003, 0x004, 0x005, 0x006, 0x007, 0x008, 0x009, 0x00a, 0x00b, 0x00c, 0x00d, 0x00e, 0x00f, + 0x000, 0x001, 0x002, 0x003, 0x004, 0x005, 0x006, 0x007, 0x008, 0x009, 0x00a, 0x00b, 0x00c, 0x00d, 0x00e, 0x00f, + 0x000, 0x001, 0x002, 0x003, 0x004, 0x005, 0x006, 0x007, 0x008, 0x009, 0x00a, 0x00b, 0x00c, 0x00d, 0x00e, 0x00f, + 0x000, 0x001, 0x002, 0x003, 0x004, 0x005, 0x006, 0x007, 0x008, 0x009, 0x00a, 0x00b, 0x00c, 0x00d, 0x00e, 0x00f, + 0x000, 0x001, 0x002, 0x003, 0x004, 0x005, 0x006, 0x007, 0x008, 0x009, 0x00a, 0x00b, 0x00c, 0x00d, 0x00e, 0x00f, + 0x000, 0x001, 0x002, 0x003, 0x004, 0x005, 0x006, 0x007, 0x008, 0x009, 0x00a, 0x00b, 0x00c, 0x00d, 0x00e, 0x00f, + 0x000, 0x001, 0x002, 0x003, 0x004, 0x005, 0x006, 0x007, 0x008, 0x009, 0x00a, 0x00b, 0x00c, 0x00d, 0x00e, 0x00f, + 0x000, 0x001, 0x002, 0x003, 0x004, 0x005, 0x006, 0x007, 0x008, 0x009, 0x00a, 0x00b, 0x00c, 0x00d, 0x00e, 0x00f, + 0x000, 0x001, 0x002, 0x003, 0x004, 0x005, 0x006, 0x007, 0x008, 0x009, 0x00a, 0x00b, 0x00c, 0x00d, 0x00e, 0x00f, + 0x000, 0x001, 0x002, 0x003, 0x004, 0x005, 0x006, 0x007, 0x008, 0x009, 0x00a, 0x00b, 0x00c, 0x00d, 0x00e, 0x00f, + 0x000, 0x001, 0x002, 0x003, 0x004, 0x005, 0x006, 0x007, 0x008, 0x009, 0x00a, 0x00b, 0x00c, 0x00d, 0x00e, 0x00f, + 0x000, 0x001, 0x002, 0x003, 0x004, 0x005, 0x006, 0x007, 0x008, 0x009, 0x00a, 0x00b, 0x00c, 0x00d, 0x00e, 0x00f, + 0x000, 0x001, 0x002, 0x003, 0x004, 0x005, 0x006, 0x007, 0x008, 0x009, 0x00a, 0x00b, 0x00c, 0x00d, 0x00e, 0x00f, + 0x000, 0x001, 0x002, 0x003, 0x004, 0x005, 0x006, 0x007, 0x008, 0x009, 0x00a, 0x00b, 0x00c, 0x00d, 0x00e, 0x00f, + 0x000, 0x001, 0x002, 0x003, 0x004, 0x005, 0x006, 0x007, 0x008, 0x009, 0x00a, 0x00b, 0x00c, 0x00d, 0x00e, 0x00f, + 0x000, 0x001, 0x002, 0x003, 0x003, 0x004, 0x005, 0x006, 0x007, 0x008, 0x009, 0x00a, 0x00a, 0x00b, 0x00c, 0x00d, // Mode disp 7 + 0x000, 0x001, 0x002, 0x003, 0x003, 0x004, 0x005, 0x006, 0x006, 0x007, 0x008, 0x009, 0x00a, 0x00b, 0x00c, 0x00d, + 0x000, 0x001, 0x002, 0x003, 0x003, 0x004, 0x005, 0x006, 0x006, 0x007, 0x008, 0x009, 0x009, 0x00a, 0x00b, 0x00c, + 0x000, 0x001, 0x002, 0x003, 0x003, 0x004, 0x005, 0x006, 0x006, 0x007, 0x008, 0x009, 0x009, 0x00a, 0x00b, 0x00c, + 0x000, 0x001, 0x002, 0x003, 0x004, 0x005, 0x006, 0x007, 0x007, 0x008, 0x009, 0x00a, 0x00a, 0x00b, 0x00c, 0x00d, + 0x000, 0x001, 0x002, 0x003, 0x003, 0x004, 0x005, 0x006, 0x007, 0x008, 0x009, 0x00a, 0x00a, 0x00b, 0x00c, 0x00d, + 0x000, 0x001, 0x002, 0x003, 0x003, 0x004, 0x005, 0x006, 0x006, 0x007, 0x008, 0x009, 0x00a, 0x00b, 0x00c, 0x00d, + 0x000, 0x001, 0x002, 0x003, 0x003, 0x004, 0x005, 0x006, 0x006, 0x007, 0x008, 0x009, 0x009, 0x00a, 0x00b, 0x00c, + 0x000, 0x001, 0x002, 0x003, 0x003, 0x004, 0x005, 0x006, 0x006, 0x007, 0x008, 0x009, 0x009, 0x00a, 0x00b, 0x00c, + 0x000, 0x001, 0x002, 0x003, 0x004, 0x005, 0x006, 0x007, 0x007, 0x008, 0x009, 0x00a, 0x00a, 0x00b, 0x00c, 0x00d, + 0x000, 0x001, 0x002, 0x003, 0x003, 0x004, 0x005, 0x006, 0x007, 0x008, 0x009, 0x00a, 0x00a, 0x00b, 0x00c, 0x00d, + 0x000, 0x001, 0x002, 0x003, 0x003, 0x004, 0x005, 0x006, 0x006, 0x007, 0x008, 0x009, 0x00a, 0x00b, 0x00c, 0x00d, + 0x000, 0x001, 0x002, 0x003, 0x003, 0x004, 0x005, 0x006, 0x006, 0x007, 0x008, 0x009, 0x009, 0x00a, 0x00b, 0x00c, + 0x000, 0x001, 0x002, 0x003, 0x003, 0x004, 0x005, 0x006, 0x006, 0x007, 0x008, 0x009, 0x009, 0x00a, 0x00b, 0x00c, + 0x000, 0x001, 0x002, 0x003, 0x004, 0x005, 0x006, 0x007, 0x007, 0x008, 0x009, 0x00a, 0x00a, 0x00b, 0x00c, 0x00d, + 0x000, 0x001, 0x002, 0x003, 0x003, 0x004, 0x005, 0x006, 0x007, 0x008, 0x009, 0x00a, 0x00a, 0x00b, 0x00c, 0x00d, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x005, 0x006, 0x007, 0x008, 0x008, 0x009, 0x00a, 0x00b, // Mode disp 8 + 0x000, 0x001, 0x002, 0x003, 0x003, 0x004, 0x005, 0x006, 0x006, 0x007, 0x008, 0x009, 0x008, 0x009, 0x00a, 0x00b, + 0x000, 0x001, 0x002, 0x003, 0x003, 0x004, 0x005, 0x006, 0x005, 0x006, 0x007, 0x008, 0x008, 0x009, 0x00a, 0x00b, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x005, 0x006, 0x007, 0x008, 0x008, 0x009, 0x00a, 0x00b, + 0x000, 0x001, 0x002, 0x003, 0x003, 0x004, 0x005, 0x006, 0x006, 0x007, 0x008, 0x009, 0x008, 0x009, 0x00a, 0x00b, + 0x000, 0x001, 0x002, 0x003, 0x003, 0x004, 0x005, 0x006, 0x005, 0x006, 0x007, 0x008, 0x008, 0x009, 0x00a, 0x00b, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x005, 0x006, 0x007, 0x008, 0x008, 0x009, 0x00a, 0x00b, + 0x000, 0x001, 0x002, 0x003, 0x003, 0x004, 0x005, 0x006, 0x006, 0x007, 0x008, 0x009, 0x008, 0x009, 0x00a, 0x00b, + 0x000, 0x001, 0x002, 0x003, 0x003, 0x004, 0x005, 0x006, 0x005, 0x006, 0x007, 0x008, 0x008, 0x009, 0x00a, 0x00b, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x005, 0x006, 0x007, 0x008, 0x008, 0x009, 0x00a, 0x00b, + 0x000, 0x001, 0x002, 0x003, 0x003, 0x004, 0x005, 0x006, 0x006, 0x007, 0x008, 0x009, 0x008, 0x009, 0x00a, 0x00b, + 0x000, 0x001, 0x002, 0x003, 0x003, 0x004, 0x005, 0x006, 0x005, 0x006, 0x007, 0x008, 0x008, 0x009, 0x00a, 0x00b, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x005, 0x006, 0x007, 0x008, 0x008, 0x009, 0x00a, 0x00b, + 0x000, 0x001, 0x002, 0x003, 0x003, 0x004, 0x005, 0x006, 0x006, 0x007, 0x008, 0x009, 0x008, 0x009, 0x00a, 0x00b, + 0x000, 0x001, 0x002, 0x003, 0x003, 0x004, 0x005, 0x006, 0x005, 0x006, 0x007, 0x008, 0x008, 0x009, 0x00a, 0x00b, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x005, 0x006, 0x007, 0x008, 0x008, 0x009, 0x00a, 0x00b, + 0x000, 0x001, 0x002, 0x003, 0x003, 0x004, 0x005, 0x006, 0x005, 0x006, 0x007, 0x008, 0x007, 0x008, 0x009, 0x00a, // Mode disp 9 + 0x000, 0x001, 0x002, 0x003, 0x003, 0x004, 0x005, 0x006, 0x005, 0x006, 0x007, 0x008, 0x007, 0x008, 0x009, 0x00a, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, 0x006, 0x007, 0x008, 0x009, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, 0x007, 0x008, 0x009, 0x00a, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, 0x007, 0x008, 0x009, 0x00a, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x005, 0x006, 0x007, 0x008, 0x007, 0x008, 0x009, 0x00a, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x005, 0x006, 0x007, 0x008, 0x007, 0x008, 0x009, 0x00a, + 0x000, 0x001, 0x002, 0x003, 0x003, 0x004, 0x005, 0x006, 0x005, 0x006, 0x007, 0x008, 0x007, 0x008, 0x009, 0x00a, + 0x000, 0x001, 0x002, 0x003, 0x003, 0x004, 0x005, 0x006, 0x005, 0x006, 0x007, 0x008, 0x007, 0x008, 0x009, 0x00a, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, 0x006, 0x007, 0x008, 0x009, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, 0x007, 0x008, 0x009, 0x00a, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, 0x007, 0x008, 0x009, 0x00a, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x005, 0x006, 0x007, 0x008, 0x007, 0x008, 0x009, 0x00a, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x005, 0x006, 0x007, 0x008, 0x007, 0x008, 0x009, 0x00a, + 0x000, 0x001, 0x002, 0x003, 0x003, 0x004, 0x005, 0x006, 0x005, 0x006, 0x007, 0x008, 0x007, 0x008, 0x009, 0x00a, + 0x000, 0x001, 0x002, 0x003, 0x003, 0x004, 0x005, 0x006, 0x005, 0x006, 0x007, 0x008, 0x007, 0x008, 0x009, 0x00a, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, 0x006, 0x007, 0x008, 0x009, // Mode disp 10 + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, 0x006, 0x007, 0x008, 0x009, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, 0x006, 0x007, 0x008, 0x009, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, 0x006, 0x007, 0x008, 0x009, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, 0x006, 0x007, 0x008, 0x009, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, 0x006, 0x007, 0x008, 0x009, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, 0x006, 0x007, 0x008, 0x009, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, 0x006, 0x007, 0x008, 0x009, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, 0x006, 0x007, 0x008, 0x009, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, 0x006, 0x007, 0x008, 0x009, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, 0x006, 0x007, 0x008, 0x009, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, 0x006, 0x007, 0x008, 0x009, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, 0x006, 0x007, 0x008, 0x009, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, 0x006, 0x007, 0x008, 0x009, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, 0x006, 0x007, 0x008, 0x009, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, 0x006, 0x007, 0x008, 0x009, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, 0x005, 0x006, 0x007, 0x008, // Mode disp 11 + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, 0x005, 0x006, 0x007, 0x008, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, 0x005, 0x006, 0x007, 0x008, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, 0x005, 0x006, 0x007, 0x008, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, 0x006, 0x007, 0x008, 0x009, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, 0x006, 0x007, 0x008, 0x009, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, 0x006, 0x007, 0x008, 0x009, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x003, 0x004, 0x005, 0x006, 0x005, 0x006, 0x007, 0x008, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x003, 0x004, 0x005, 0x006, 0x005, 0x006, 0x007, 0x008, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, 0x005, 0x006, 0x007, 0x008, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, 0x005, 0x006, 0x007, 0x008, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, 0x005, 0x006, 0x007, 0x008, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, 0x005, 0x006, 0x007, 0x008, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, 0x006, 0x007, 0x008, 0x009, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, 0x006, 0x007, 0x008, 0x009, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, 0x006, 0x007, 0x008, 0x009, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x003, 0x004, 0x005, 0x006, 0x004, 0x005, 0x006, 0x007, // Mode disp 12 + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, 0x005, 0x006, 0x007, 0x008, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, 0x005, 0x006, 0x007, 0x008, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x003, 0x004, 0x005, 0x006, 0x005, 0x006, 0x007, 0x008, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, 0x005, 0x006, 0x007, 0x008, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x003, 0x004, 0x005, 0x006, 0x004, 0x005, 0x006, 0x007, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, 0x005, 0x006, 0x007, 0x008, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, 0x005, 0x006, 0x007, 0x008, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x003, 0x004, 0x005, 0x006, 0x005, 0x006, 0x007, 0x008, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, 0x005, 0x006, 0x007, 0x008, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x003, 0x004, 0x005, 0x006, 0x004, 0x005, 0x006, 0x007, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, 0x005, 0x006, 0x007, 0x008, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, 0x005, 0x006, 0x007, 0x008, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x003, 0x004, 0x005, 0x006, 0x005, 0x006, 0x007, 0x008, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, 0x005, 0x006, 0x007, 0x008, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x003, 0x004, 0x005, 0x006, 0x004, 0x005, 0x006, 0x007, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, 0x005, 0x006, 0x007, 0x008, // Mode disp 13 + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x003, 0x004, 0x005, 0x006, 0x004, 0x005, 0x006, 0x007, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x003, 0x004, 0x005, 0x006, 0x004, 0x005, 0x006, 0x007, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, 0x004, 0x005, 0x006, 0x007, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x003, 0x004, 0x005, 0x006, 0x004, 0x005, 0x006, 0x007, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, 0x005, 0x006, 0x007, 0x008, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x003, 0x004, 0x005, 0x006, 0x004, 0x005, 0x006, 0x007, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, 0x005, 0x006, 0x007, 0x008, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x003, 0x004, 0x005, 0x006, 0x004, 0x005, 0x006, 0x007, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, 0x004, 0x005, 0x006, 0x007, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, 0x004, 0x005, 0x006, 0x007, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x003, 0x004, 0x005, 0x006, 0x004, 0x005, 0x006, 0x007, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x003, 0x004, 0x005, 0x006, 0x004, 0x005, 0x006, 0x007, // Mode disp 14 + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x003, 0x004, 0x005, 0x006, 0x004, 0x005, 0x006, 0x007, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x003, 0x004, 0x005, 0x006, 0x004, 0x005, 0x006, 0x007, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, 0x004, 0x005, 0x006, 0x007, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, 0x004, 0x005, 0x006, 0x007, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, 0x004, 0x005, 0x006, 0x007, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x003, 0x004, 0x005, 0x006, 0x004, 0x005, 0x006, 0x007, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x003, 0x004, 0x005, 0x006, 0x004, 0x005, 0x006, 0x007, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x003, 0x004, 0x005, 0x006, 0x004, 0x005, 0x006, 0x007, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, // Mode disp 15 + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x003, 0x004, 0x005, 0x006, 0x004, 0x005, 0x006, 0x007, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, 0x004, 0x005, 0x006, 0x007, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x004, 0x005, 0x006, 0x007, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, 0x004, 0x005, 0x006, 0x007, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x003, 0x004, 0x005, 0x006, 0x004, 0x005, 0x006, 0x007, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, // Mode disp 16 + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, // Mode disp 17 + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, // Mode disp 18 + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, // Mode disp 19 + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x003, 0x004, 0x005, 0x006, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, // Mode disp 20 + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, // Mode disp 21 + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, // Mode disp 22 + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, // Mode disp 23 + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x002, 0x003, 0x004, 0x005, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, // Mode disp 24 + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, // Mode disp 25 + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, // Mode disp 26 + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, // Mode disp 27 + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, // Mode disp 28 + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, // Mode disp 29 + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, // Mode disp 30 + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, // Mode disp 31 + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x001, 0x002, 0x003, 0x004, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, + 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, 0x000, 0x001, 0x002, 0x003, +}; + + +// Shuffle vectors for w4 vertical pdpc. +static ALIGNED(32) const uint8_t intra_pdpc_shuffle_vectors_w4_ver[] = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // Mode disp 0 | not used + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // Mode disp 1 | not used + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // Mode disp 2 | not used + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // Mode disp 3 | not used + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // Mode disp 4 | not used + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // Mode disp 5 | not used + 0x00, 0x04, 0x08, 0x0c, 0x01, 0x05, 0x09, 0x0d, 0x02, 0x06, 0x0a, 0x0e, 0x03, 0x07, 0x0b, 0x0f, // Mode disp 6 + 0x00, 0x03, 0x07, 0x0a, 0x01, 0x04, 0x08, 0x0b, 0x02, 0x05, 0x09, 0x0c, 0x03, 0x06, 0x0a, 0x0d, // Mode disp 7 + 0x00, 0x02, 0x05, 0x08, 0x01, 0x03, 0x06, 0x09, 0x02, 0x04, 0x07, 0x0a, 0x03, 0x05, 0x08, 0x0b, // Mode disp 8 + 0x00, 0x03, 0x05, 0x07, 0x01, 0x04, 0x06, 0x08, 0x02, 0x05, 0x07, 0x09, 0x03, 0x06, 0x08, 0x0a, // Mode disp 9 + 0x00, 0x02, 0x04, 0x06, 0x01, 0x03, 0x05, 0x07, 0x02, 0x04, 0x06, 0x08, 0x03, 0x05, 0x07, 0x09, // Mode disp 10 + 0x00, 0x02, 0x03, 0x05, 0x01, 0x03, 0x04, 0x06, 0x02, 0x04, 0x05, 0x07, 0x03, 0x05, 0x06, 0x08, // Mode disp 11 + 0x00, 0x01, 0x03, 0x04, 0x01, 0x02, 0x04, 0x05, 0x02, 0x03, 0x05, 0x06, 0x03, 0x04, 0x06, 0x07, // Mode disp 12 + 0x00, 0x02, 0x03, 0x05, 0x01, 0x03, 0x04, 0x06, 0x02, 0x04, 0x05, 0x07, 0x03, 0x05, 0x06, 0x08, // Mode disp 13 + 0x00, 0x01, 0x03, 0x04, 0x01, 0x02, 0x04, 0x05, 0x02, 0x03, 0x05, 0x06, 0x03, 0x04, 0x06, 0x07, // Mode disp 14 + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, // Mode disp 15 + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, // Mode disp 16 + 0x00, 0x01, 0x02, 0x03, 0x01, 0x02, 0x03, 0x04, 0x02, 0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x06, // Mode disp 17 + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, // Mode disp 18 + 0x00, 0x00, 0x01, 0x02, 0x01, 0x01, 0x02, 0x03, 0x02, 0x02, 0x03, 0x04, 0x03, 0x03, 0x04, 0x05, // Mode disp 19 + 0x00, 0x00, 0x01, 0x02, 0x01, 0x01, 0x02, 0x03, 0x02, 0x02, 0x03, 0x04, 0x03, 0x03, 0x04, 0x05, // Mode disp 20 + 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, // Mode disp 21 + 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, // Mode disp 22 + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, // Mode disp 23 + 0x00, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, // Mode disp 24 + 0x00, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, // Mode disp 25 + 0x00, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, // Mode disp 26 + 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, // Mode disp 27 + 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, // Mode disp 28 + 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, // Mode disp 29 + 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, // Mode disp 30 + 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, // Mode disp 31 +}; + + +// Shuffle vectors for 8x2 scale 1 vertical pdpc. +static ALIGNED(32) const uint8_t intra_pdpc_shuffle_vectors_8x2_scale1_ver[] = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // Mode disp 0 | not used + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // Mode disp 1 | not used + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // Mode disp 2 | not used + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // Mode disp 3 | not used + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // Mode disp 4 | not used + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // Mode disp 5 | not used + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // Mode disp 6 | not used + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // Mode disp 7 | not used + 0x00, 0x02, 0x05, 0x08, 0x0a, 0x0d, 0xff, 0xff, 0x01, 0x03, 0x06, 0x09, 0x0b, 0x0e, 0xff, 0xff, // Mode disp 8 + 0x00, 0x03, 0x05, 0x07, 0x09, 0x0c, 0xff, 0xff, 0x01, 0x04, 0x06, 0x08, 0x0a, 0x0d, 0xff, 0xff, // Mode disp 9 + 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0xff, 0xff, 0x01, 0x03, 0x05, 0x07, 0x09, 0x0b, 0xff, 0xff, // Mode disp 10 + 0x00, 0x02, 0x03, 0x05, 0x07, 0x09, 0xff, 0xff, 0x01, 0x03, 0x04, 0x06, 0x08, 0x0a, 0xff, 0xff, // Mode disp 11 + 0x00, 0x01, 0x03, 0x04, 0x06, 0x08, 0xff, 0xff, 0x01, 0x02, 0x04, 0x05, 0x07, 0x09, 0xff, 0xff, // Mode disp 12 + 0x00, 0x02, 0x03, 0x05, 0x06, 0x07, 0xff, 0xff, 0x01, 0x03, 0x04, 0x06, 0x07, 0x08, 0xff, 0xff, // Mode disp 13 + 0x00, 0x01, 0x03, 0x04, 0x05, 0x06, 0xff, 0xff, 0x01, 0x02, 0x04, 0x05, 0x06, 0x07, 0xff, 0xff, // Mode disp 14 + 0x00, 0x01, 0x02, 0x03, 0x05, 0x06, 0xff, 0xff, 0x01, 0x02, 0x03, 0x04, 0x06, 0x07, 0xff, 0xff, // Mode disp 15 + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0xff, 0xff, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0xff, 0xff, // Mode disp 16 + 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0xff, 0xff, 0x01, 0x02, 0x03, 0x04, 0x05, 0x05, 0xff, 0xff, // Mode disp 17 + 0x00, 0x01, 0x01, 0x02, 0x03, 0x04, 0xff, 0xff, 0x01, 0x02, 0x02, 0x03, 0x04, 0x05, 0xff, 0xff, // Mode disp 18 + 0x00, 0x00, 0x01, 0x02, 0x03, 0x03, 0xff, 0xff, 0x01, 0x01, 0x02, 0x03, 0x04, 0x04, 0xff, 0xff, // Mode disp 19 + 0x00, 0x00, 0x01, 0x02, 0x02, 0x03, 0xff, 0xff, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0xff, 0xff, // Mode disp 20 + 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0xff, 0xff, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0xff, 0xff, // Mode disp 21 + 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0xff, 0xff, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0xff, 0xff, // Mode disp 22 + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0xff, 0xff, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0xff, 0xff, // Mode disp 23 + 0x00, 0x01, 0x01, 0x01, 0x02, 0x02, 0xff, 0xff, 0x01, 0x02, 0x02, 0x02, 0x03, 0x03, 0xff, 0xff, // Mode disp 24 + 0x00, 0x01, 0x01, 0x01, 0x02, 0x02, 0xff, 0xff, 0x01, 0x02, 0x02, 0x02, 0x03, 0x03, 0xff, 0xff, // Mode disp 25 + 0x00, 0x01, 0x01, 0x01, 0x01, 0x02, 0xff, 0xff, 0x01, 0x02, 0x02, 0x02, 0x02, 0x03, 0xff, 0xff, // Mode disp 26 + 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0xff, 0xff, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02, 0xff, 0xff, // Mode disp 27 + 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0xff, 0xff, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0xff, 0xff, // Mode disp 28 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0xff, 0xff, 0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0xff, 0xff, // Mode disp 29 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0xff, 0xff, // Mode disp 30 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0xff, 0xff, // Mode disp 31 +}; + + +// Shuffle vectors for 8x2 scale 2 vertical pdpc. +static ALIGNED(32) const uint8_t intra_pdpc_shuffle_vectors_8x2_scale2_ver[] = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // Mode disp 0 | not used + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // Mode disp 1 | not used + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // Mode disp 2 | not used + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // Mode disp 3 | not used + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // Mode disp 4 | not used + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // Mode disp 5 | not used + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // Mode disp 6 | not used + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // Mode disp 7 | not used + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // Mode disp 8 | not used + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // Mode disp 9 | not used + 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x01, 0x03, 0x05, 0x07, 0x09, 0x0b, 0x0d, 0x0f, // Mode disp 10 + 0x00, 0x02, 0x03, 0x05, 0x07, 0x09, 0x0a, 0x0c, 0x01, 0x03, 0x04, 0x06, 0x08, 0x0a, 0x0b, 0x0d, // Mode disp 11 + 0x00, 0x01, 0x03, 0x04, 0x06, 0x08, 0x09, 0x0b, 0x01, 0x02, 0x04, 0x05, 0x07, 0x09, 0x0a, 0x0c, // Mode disp 12 + 0x00, 0x02, 0x03, 0x05, 0x06, 0x07, 0x09, 0x0a, 0x01, 0x03, 0x04, 0x06, 0x07, 0x08, 0x0a, 0x0b, // Mode disp 13 + 0x00, 0x01, 0x03, 0x04, 0x05, 0x06, 0x08, 0x09, 0x01, 0x02, 0x04, 0x05, 0x06, 0x07, 0x09, 0x0a, // Mode disp 14 + 0x00, 0x01, 0x02, 0x03, 0x05, 0x06, 0x07, 0x08, 0x01, 0x02, 0x03, 0x04, 0x06, 0x07, 0x08, 0x09, // Mode disp 15 + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, // Mode disp 16 + 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x06, 0x01, 0x02, 0x03, 0x04, 0x05, 0x05, 0x06, 0x07, // Mode disp 17 + 0x00, 0x01, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x01, 0x02, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, // Mode disp 18 + 0x00, 0x00, 0x01, 0x02, 0x03, 0x03, 0x04, 0x05, 0x01, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x06, // Mode disp 19 + 0x00, 0x00, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, // Mode disp 20 + 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, // Mode disp 21 + 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, // Mode disp 22 + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, // Mode disp 23 + 0x00, 0x01, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x01, 0x02, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, // Mode disp 24 + 0x00, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x03, 0x01, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x04, // Mode disp 25 + 0x00, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x01, 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, // Mode disp 26 + 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02, 0x02, 0x03, // Mode disp 27 + 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02, 0x02, // Mode disp 28 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, // Mode disp 29 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x02, // Mode disp 30 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // Mode disp 31 +}; + + +// Shuffle vectors for w16 scale 2 vertical pdpc. +static ALIGNED(32) const uint8_t intra_pdpc_shuffle_vectors_w16_scale2_ver[] = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // Mode disp 0 | not used + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // Mode disp 1 | not used + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // Mode disp 2 | not used + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // Mode disp 3 | not used + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // Mode disp 4 | not used + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // Mode disp 5 | not used + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // Mode disp 6 | not used + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // Mode disp 7 | not used + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // Mode disp 8 | not used + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // Mode disp 9 | not used + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // Mode disp 10 | not used + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // Mode disp 11 | not used + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // Mode disp 12 | not used + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // Mode disp 13 | not used + 0x00, 0x01, 0x03, 0x04, 0x05, 0x06, 0x08, 0x09, 0x0a, 0x0b, 0x0d, 0x0e, 0xff, 0xff, 0xff, 0xff, // Mode disp 14 + 0x00, 0x01, 0x02, 0x03, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0xff, 0xff, 0xff, 0xff, // Mode disp 15 + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0xff, 0xff, 0xff, 0xff, // Mode disp 16 + 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0xff, 0xff, 0xff, 0xff, // Mode disp 17 + 0x00, 0x01, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x06, 0x07, 0x08, 0x09, 0xff, 0xff, 0xff, 0xff, // Mode disp 18 + 0x00, 0x00, 0x01, 0x02, 0x03, 0x03, 0x04, 0x05, 0x05, 0x06, 0x07, 0x08, 0xff, 0xff, 0xff, 0xff, // Mode disp 19 + 0x00, 0x00, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x05, 0x05, 0x06, 0x07, 0xff, 0xff, 0xff, 0xff, // Mode disp 20 + 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x05, 0x05, 0x06, 0xff, 0xff, 0xff, 0xff, // Mode disp 21 + 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0xff, 0xff, 0xff, 0xff, // Mode disp 22 + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0xff, 0xff, 0xff, 0xff, // Mode disp 23 + 0x00, 0x01, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0xff, 0xff, 0xff, 0xff, // Mode disp 24 + 0x00, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0xff, 0xff, 0xff, 0xff, // Mode disp 25 + 0x00, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0xff, 0xff, 0xff, 0xff, // Mode disp 26 + 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02, 0x02, 0xff, 0xff, 0xff, 0xff, // Mode disp 27 + 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0xff, 0xff, 0xff, 0xff, // Mode disp 28 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0xff, 0xff, 0xff, 0xff, // Mode disp 29 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0xff, 0xff, 0xff, 0xff, // Mode disp 30 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, // Mode disp 31 +}; + +// Intra ref building shuffle vector tables + +static ALIGNED(16) const uint8_t intra_refbuild_shuffle_vectors_sidesize_4[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mode disp 0 + 0x04, 0x04, 0x04, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mode disp 1 + 0x04, 0x04, 0x04, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mode disp 2 + 0x04, 0x04, 0x04, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mode disp 3 + 0x04, 0x04, 0x04, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mode disp 4 + 0x04, 0x04, 0x04, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mode disp 5 + 0x04, 0x04, 0x04, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mode disp 6 + 0x04, 0x04, 0x04, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mode disp 7 + 0x04, 0x04, 0x04, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mode disp 8 + 0x04, 0x04, 0x04, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mode disp 9 + 0x04, 0x04, 0x04, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mode disp 10 + 0x04, 0x04, 0x04, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mode disp 11 + 0x04, 0x04, 0x03, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mode disp 12 + 0x04, 0x04, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mode disp 13 + 0x04, 0x04, 0x02, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mode disp 14 + 0x04, 0x03, 0x02, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mode disp 15 + 0x04, 0x03, 0x02, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mode disp 16 +}; + +static ALIGNED(16) const uint8_t intra_refbuild_shuffle_vectors_sidesize_8[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mode disp 0 + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mode disp 1 + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mode disp 2 + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mode disp 3 + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mode disp 4 + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mode disp 5 + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mode disp 6 + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x06, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mode disp 7 + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x05, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mode disp 8 + 0x08, 0x08, 0x08, 0x08, 0x08, 0x07, 0x05, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mode disp 9 + 0x08, 0x08, 0x08, 0x08, 0x08, 0x06, 0x04, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mode disp 10 + 0x08, 0x08, 0x08, 0x08, 0x07, 0x05, 0x04, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mode disp 11 + 0x08, 0x08, 0x08, 0x08, 0x06, 0x05, 0x03, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mode disp 12 + 0x08, 0x08, 0x08, 0x07, 0x06, 0x04, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mode disp 13 + 0x08, 0x08, 0x07, 0x06, 0x05, 0x04, 0x02, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mode disp 14 + 0x08, 0x08, 0x07, 0x06, 0x04, 0x03, 0x02, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mode disp 15 + 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mode disp 16 +}; + +static ALIGNED(16) const uint8_t intra_refbuild_shuffle_vectors_sidesize_16[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mode disp 0 + 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, // mode disp 1 + 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, // mode disp 2 + 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0a, // mode disp 3 + 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x07, // mode disp 4 + 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0a, 0x04, // mode disp 5 + 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0b, 0x07, 0x03, // mode disp 6 + 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0c, 0x09, 0x05, 0x02, // mode disp 7 + 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0c, 0x0a, 0x07, 0x04, 0x02, // mode disp 8 + 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0d, 0x0a, 0x08, 0x06, 0x04, 0x01, // mode disp 9 + 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0d, 0x0b, 0x09, 0x07, 0x05, 0x03, 0x01, // mode disp 10 + 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0d, 0x0b, 0x0a, 0x08, 0x06, 0x04, 0x03, 0x01, // mode disp 11 + 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0d, 0x0c, 0x0a, 0x09, 0x07, 0x05, 0x04, 0x02, 0x01, // mode disp 12 + 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0e, 0x0d, 0x0c, 0x0a, 0x09, 0x07, 0x06, 0x05, 0x03, 0x02, 0x00, // mode disp 13 + 0x0f, 0x0f, 0x0f, 0x0f, 0x0e, 0x0d, 0x0b, 0x0a, 0x09, 0x08, 0x06, 0x05, 0x04, 0x03, 0x01, 0x00, // mode disp 14 + 0x0f, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x03, 0x02, 0x01, 0x00, // mode disp 15 + 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, // mode disp 16 +}; + + + +// Y coord tables +ALIGNED(32) static const int8_t planar_avx2_ver_w4ys[1024] = { + 63, 1, 63, 1, 63, 1, 63, 1, 62, 2, 62, 2, 62, 2, 62, 2, 61, 3, 61, 3, 61, 3, 61, 3, 60, 4, 60, 4, 60, 4, 60, 4, // offset 0, line == 64 + 59, 5, 59, 5, 59, 5, 59, 5, 58, 6, 58, 6, 58, 6, 58, 6, 57, 7, 57, 7, 57, 7, 57, 7, 56, 8, 56, 8, 56, 8, 56, 8, + 55, 9, 55, 9, 55, 9, 55, 9, 54, 10, 54, 10, 54, 10, 54, 10, 53, 11, 53, 11, 53, 11, 53, 11, 52, 12, 52, 12, 52, 12, 52, 12, + 51, 13, 51, 13, 51, 13, 51, 13, 50, 14, 50, 14, 50, 14, 50, 14, 49, 15, 49, 15, 49, 15, 49, 15, 48, 16, 48, 16, 48, 16, 48, 16, + 47, 17, 47, 17, 47, 17, 47, 17, 46, 18, 46, 18, 46, 18, 46, 18, 45, 19, 45, 19, 45, 19, 45, 19, 44, 20, 44, 20, 44, 20, 44, 20, + 43, 21, 43, 21, 43, 21, 43, 21, 42, 22, 42, 22, 42, 22, 42, 22, 41, 23, 41, 23, 41, 23, 41, 23, 40, 24, 40, 24, 40, 24, 40, 24, + 39, 25, 39, 25, 39, 25, 39, 25, 38, 26, 38, 26, 38, 26, 38, 26, 37, 27, 37, 27, 37, 27, 37, 27, 36, 28, 36, 28, 36, 28, 36, 28, + 35, 29, 35, 29, 35, 29, 35, 29, 34, 30, 34, 30, 34, 30, 34, 30, 33, 31, 33, 31, 33, 31, 33, 31, 32, 32, 32, 32, 32, 32, 32, 32, + 31, 33, 31, 33, 31, 33, 31, 33, 30, 34, 30, 34, 30, 34, 30, 34, 29, 35, 29, 35, 29, 35, 29, 35, 28, 36, 28, 36, 28, 36, 28, 36, + 27, 37, 27, 37, 27, 37, 27, 37, 26, 38, 26, 38, 26, 38, 26, 38, 25, 39, 25, 39, 25, 39, 25, 39, 24, 40, 24, 40, 24, 40, 24, 40, + 23, 41, 23, 41, 23, 41, 23, 41, 22, 42, 22, 42, 22, 42, 22, 42, 21, 43, 21, 43, 21, 43, 21, 43, 20, 44, 20, 44, 20, 44, 20, 44, + 19, 45, 19, 45, 19, 45, 19, 45, 18, 46, 18, 46, 18, 46, 18, 46, 17, 47, 17, 47, 17, 47, 17, 47, 16, 48, 16, 48, 16, 48, 16, 48, + 15, 49, 15, 49, 15, 49, 15, 49, 14, 50, 14, 50, 14, 50, 14, 50, 13, 51, 13, 51, 13, 51, 13, 51, 12, 52, 12, 52, 12, 52, 12, 52, + 11, 53, 11, 53, 11, 53, 11, 53, 10, 54, 10, 54, 10, 54, 10, 54, 9, 55, 9, 55, 9, 55, 9, 55, 8, 56, 8, 56, 8, 56, 8, 56, + 7, 57, 7, 57, 7, 57, 7, 57, 6, 58, 6, 58, 6, 58, 6, 58, 5, 59, 5, 59, 5, 59, 5, 59, 4, 60, 4, 60, 4, 60, 4, 60, + 3, 61, 3, 61, 3, 61, 3, 61, 2, 62, 2, 62, 2, 62, 2, 62, 1, 63, 1, 63, 1, 63, 1, 63, 0, 64, 0, 64, 0, 64, 0, 64, + 31, 1, 31, 1, 31, 1, 31, 1, 30, 2, 30, 2, 30, 2, 30, 2, 29, 3, 29, 3, 29, 3, 29, 3, 28, 4, 28, 4, 28, 4, 28, 4, // offset 16, line == 32 + 27, 5, 27, 5, 27, 5, 27, 5, 26, 6, 26, 6, 26, 6, 26, 6, 25, 7, 25, 7, 25, 7, 25, 7, 24, 8, 24, 8, 24, 8, 24, 8, + 23, 9, 23, 9, 23, 9, 23, 9, 22, 10, 22, 10, 22, 10, 22, 10, 21, 11, 21, 11, 21, 11, 21, 11, 20, 12, 20, 12, 20, 12, 20, 12, + 19, 13, 19, 13, 19, 13, 19, 13, 18, 14, 18, 14, 18, 14, 18, 14, 17, 15, 17, 15, 17, 15, 17, 15, 16, 16, 16, 16, 16, 16, 16, 16, + 15, 17, 15, 17, 15, 17, 15, 17, 14, 18, 14, 18, 14, 18, 14, 18, 13, 19, 13, 19, 13, 19, 13, 19, 12, 20, 12, 20, 12, 20, 12, 20, + 11, 21, 11, 21, 11, 21, 11, 21, 10, 22, 10, 22, 10, 22, 10, 22, 9, 23, 9, 23, 9, 23, 9, 23, 8, 24, 8, 24, 8, 24, 8, 24, + 7, 25, 7, 25, 7, 25, 7, 25, 6, 26, 6, 26, 6, 26, 6, 26, 5, 27, 5, 27, 5, 27, 5, 27, 4, 28, 4, 28, 4, 28, 4, 28, + 3, 29, 3, 29, 3, 29, 3, 29, 2, 30, 2, 30, 2, 30, 2, 30, 1, 31, 1, 31, 1, 31, 1, 31, 0, 32, 0, 32, 0, 32, 0, 32, + 15, 1, 15, 1, 15, 1, 15, 1, 14, 2, 14, 2, 14, 2, 14, 2, 13, 3, 13, 3, 13, 3, 13, 3, 12, 4, 12, 4, 12, 4, 12, 4, // offset 24, line == 16 + 11, 5, 11, 5, 11, 5, 11, 5, 10, 6, 10, 6, 10, 6, 10, 6, 9, 7, 9, 7, 9, 7, 9, 7, 8, 8, 8, 8, 8, 8, 8, 8, + 7, 9, 7, 9, 7, 9, 7, 9, 6, 10, 6, 10, 6, 10, 6, 10, 5, 11, 5, 11, 5, 11, 5, 11, 4, 12, 4, 12, 4, 12, 4, 12, + 3, 13, 3, 13, 3, 13, 3, 13, 2, 14, 2, 14, 2, 14, 2, 14, 1, 15, 1, 15, 1, 15, 1, 15, 0, 16, 0, 16, 0, 16, 0, 16, + 7, 1, 7, 1, 7, 1, 7, 1, 6, 2, 6, 2, 6, 2, 6, 2, 5, 3, 5, 3, 5, 3, 5, 3, 4, 4, 4, 4, 4, 4, 4, 4, // offset 28, line == 8 + 3, 5, 3, 5, 3, 5, 3, 5, 2, 6, 2, 6, 2, 6, 2, 6, 1, 7, 1, 7, 1, 7, 1, 7, 0, 8, 0, 8, 0, 8, 0, 8, + 3, 1, 3, 1, 3, 1, 3, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 3, 1, 3, 1, 3, 1, 3, 0, 4, 0, 4, 0, 4, 0, 4, // offset 30, line == 4 + 1, 1, 1, 1, 1, 1, 1, 1, 0, 2, 0, 2, 0, 2, 0, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 2, 0, 2, 0, 2, 0, 2, // offset 31. line == 2 +}; + +ALIGNED(32) static const int8_t planar_avx2_ver_w8ys[2080] = { + 63, 1, 63, 1, 63, 1, 63, 1, 63, 1, 63, 1, 63, 1, 63, 1, 62, 2, 62, 2, 62, 2, 62, 2, 62, 2, 62, 2, 62, 2, 62, 2, // offset 0, line == 64 + 61, 3, 61, 3, 61, 3, 61, 3, 61, 3, 61, 3, 61, 3, 61, 3, 60, 4, 60, 4, 60, 4, 60, 4, 60, 4, 60, 4, 60, 4, 60, 4, + 59, 5, 59, 5, 59, 5, 59, 5, 59, 5, 59, 5, 59, 5, 59, 5, 58, 6, 58, 6, 58, 6, 58, 6, 58, 6, 58, 6, 58, 6, 58, 6, + 57, 7, 57, 7, 57, 7, 57, 7, 57, 7, 57, 7, 57, 7, 57, 7, 56, 8, 56, 8, 56, 8, 56, 8, 56, 8, 56, 8, 56, 8, 56, 8, + 55, 9, 55, 9, 55, 9, 55, 9, 55, 9, 55, 9, 55, 9, 55, 9, 54, 10, 54, 10, 54, 10, 54, 10, 54, 10, 54, 10, 54, 10, 54, 10, + 53, 11, 53, 11, 53, 11, 53, 11, 53, 11, 53, 11, 53, 11, 53, 11, 52, 12, 52, 12, 52, 12, 52, 12, 52, 12, 52, 12, 52, 12, 52, 12, + 51, 13, 51, 13, 51, 13, 51, 13, 51, 13, 51, 13, 51, 13, 51, 13, 50, 14, 50, 14, 50, 14, 50, 14, 50, 14, 50, 14, 50, 14, 50, 14, + 49, 15, 49, 15, 49, 15, 49, 15, 49, 15, 49, 15, 49, 15, 49, 15, 48, 16, 48, 16, 48, 16, 48, 16, 48, 16, 48, 16, 48, 16, 48, 16, + 47, 17, 47, 17, 47, 17, 47, 17, 47, 17, 47, 17, 47, 17, 47, 17, 46, 18, 46, 18, 46, 18, 46, 18, 46, 18, 46, 18, 46, 18, 46, 18, + 45, 19, 45, 19, 45, 19, 45, 19, 45, 19, 45, 19, 45, 19, 45, 19, 44, 20, 44, 20, 44, 20, 44, 20, 44, 20, 44, 20, 44, 20, 44, 20, + 43, 21, 43, 21, 43, 21, 43, 21, 43, 21, 43, 21, 43, 21, 43, 21, 42, 22, 42, 22, 42, 22, 42, 22, 42, 22, 42, 22, 42, 22, 42, 22, + 41, 23, 41, 23, 41, 23, 41, 23, 41, 23, 41, 23, 41, 23, 41, 23, 40, 24, 40, 24, 40, 24, 40, 24, 40, 24, 40, 24, 40, 24, 40, 24, + 39, 25, 39, 25, 39, 25, 39, 25, 39, 25, 39, 25, 39, 25, 39, 25, 38, 26, 38, 26, 38, 26, 38, 26, 38, 26, 38, 26, 38, 26, 38, 26, + 37, 27, 37, 27, 37, 27, 37, 27, 37, 27, 37, 27, 37, 27, 37, 27, 36, 28, 36, 28, 36, 28, 36, 28, 36, 28, 36, 28, 36, 28, 36, 28, + 35, 29, 35, 29, 35, 29, 35, 29, 35, 29, 35, 29, 35, 29, 35, 29, 34, 30, 34, 30, 34, 30, 34, 30, 34, 30, 34, 30, 34, 30, 34, 30, + 33, 31, 33, 31, 33, 31, 33, 31, 33, 31, 33, 31, 33, 31, 33, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + 31, 33, 31, 33, 31, 33, 31, 33, 31, 33, 31, 33, 31, 33, 31, 33, 30, 34, 30, 34, 30, 34, 30, 34, 30, 34, 30, 34, 30, 34, 30, 34, + 29, 35, 29, 35, 29, 35, 29, 35, 29, 35, 29, 35, 29, 35, 29, 35, 28, 36, 28, 36, 28, 36, 28, 36, 28, 36, 28, 36, 28, 36, 28, 36, + 27, 37, 27, 37, 27, 37, 27, 37, 27, 37, 27, 37, 27, 37, 27, 37, 26, 38, 26, 38, 26, 38, 26, 38, 26, 38, 26, 38, 26, 38, 26, 38, + 25, 39, 25, 39, 25, 39, 25, 39, 25, 39, 25, 39, 25, 39, 25, 39, 24, 40, 24, 40, 24, 40, 24, 40, 24, 40, 24, 40, 24, 40, 24, 40, + 23, 41, 23, 41, 23, 41, 23, 41, 23, 41, 23, 41, 23, 41, 23, 41, 22, 42, 22, 42, 22, 42, 22, 42, 22, 42, 22, 42, 22, 42, 22, 42, + 21, 43, 21, 43, 21, 43, 21, 43, 21, 43, 21, 43, 21, 43, 21, 43, 20, 44, 20, 44, 20, 44, 20, 44, 20, 44, 20, 44, 20, 44, 20, 44, + 19, 45, 19, 45, 19, 45, 19, 45, 19, 45, 19, 45, 19, 45, 19, 45, 18, 46, 18, 46, 18, 46, 18, 46, 18, 46, 18, 46, 18, 46, 18, 46, + 17, 47, 17, 47, 17, 47, 17, 47, 17, 47, 17, 47, 17, 47, 17, 47, 16, 48, 16, 48, 16, 48, 16, 48, 16, 48, 16, 48, 16, 48, 16, 48, + 15, 49, 15, 49, 15, 49, 15, 49, 15, 49, 15, 49, 15, 49, 15, 49, 14, 50, 14, 50, 14, 50, 14, 50, 14, 50, 14, 50, 14, 50, 14, 50, + 13, 51, 13, 51, 13, 51, 13, 51, 13, 51, 13, 51, 13, 51, 13, 51, 12, 52, 12, 52, 12, 52, 12, 52, 12, 52, 12, 52, 12, 52, 12, 52, + 11, 53, 11, 53, 11, 53, 11, 53, 11, 53, 11, 53, 11, 53, 11, 53, 10, 54, 10, 54, 10, 54, 10, 54, 10, 54, 10, 54, 10, 54, 10, 54, + 9, 55, 9, 55, 9, 55, 9, 55, 9, 55, 9, 55, 9, 55, 9, 55, 8, 56, 8, 56, 8, 56, 8, 56, 8, 56, 8, 56, 8, 56, 8, 56, + 7, 57, 7, 57, 7, 57, 7, 57, 7, 57, 7, 57, 7, 57, 7, 57, 6, 58, 6, 58, 6, 58, 6, 58, 6, 58, 6, 58, 6, 58, 6, 58, + 5, 59, 5, 59, 5, 59, 5, 59, 5, 59, 5, 59, 5, 59, 5, 59, 4, 60, 4, 60, 4, 60, 4, 60, 4, 60, 4, 60, 4, 60, 4, 60, + 3, 61, 3, 61, 3, 61, 3, 61, 3, 61, 3, 61, 3, 61, 3, 61, 2, 62, 2, 62, 2, 62, 2, 62, 2, 62, 2, 62, 2, 62, 2, 62, + 1, 63, 1, 63, 1, 63, 1, 63, 1, 63, 1, 63, 1, 63, 1, 63, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, + 31, 1, 31, 1, 31, 1, 31, 1, 31, 1, 31, 1, 31, 1, 31, 1, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, // offset 32, line == 32 + 29, 3, 29, 3, 29, 3, 29, 3, 29, 3, 29, 3, 29, 3, 29, 3, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, + 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, + 25, 7, 25, 7, 25, 7, 25, 7, 25, 7, 25, 7, 25, 7, 25, 7, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, + 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, + 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, + 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, + 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, + 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, + 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, + 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, + 7, 25, 7, 25, 7, 25, 7, 25, 7, 25, 7, 25, 7, 25, 7, 25, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, + 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, + 3, 29, 3, 29, 3, 29, 3, 29, 3, 29, 3, 29, 3, 29, 3, 29, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, + 1, 31, 1, 31, 1, 31, 1, 31, 1, 31, 1, 31, 1, 31, 1, 31, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, + 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, // offset 48, line == 16 + 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, + 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, + 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, + 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, + 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, + 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, + 7, 1, 7, 1, 7, 1, 7, 1, 7, 1, 7, 1, 7, 1, 7, 1, 6, 2, 6, 2, 6, 2, 6, 2, 6, 2, 6, 2, 6, 2, 6, 2, // offset 56, line == 8 + 5, 3, 5, 3, 5, 3, 5, 3, 5, 3, 5, 3, 5, 3, 5, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 3, 5, 3, 5, 3, 5, 3, 5, 3, 5, 3, 5, 3, 5, 3, 5, 2, 6, 2, 6, 2, 6, 2, 6, 2, 6, 2, 6, 2, 6, 2, 6, + 1, 7, 1, 7, 1, 7, 1, 7, 1, 7, 1, 7, 1, 7, 1, 7, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, + 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // offset 60, line == 4 + 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // offset 62, line == 2 + 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, // offset 64, line == 1, this might not be needed, ever +}; + + +// Delta int and delta fract tables. Rows are prediction mode, columns y offset. (or x offset for horizontal modes) +// Table extended to allow dimensions up to 80. This will ensure that the SIMD code will not read outside the table and the table is aligned to 32 bytes. +// This is done to prevent errors when multi ref line is enabled. In cases where MRL > 0, the table rows can be indexed with a value larger than 64. +// If the max dimensions change in the future, the table can be generated again with the new dimensions. The generation python script can be found in the speed bench repository. +// This is also true for the other delta tables. +ALIGNED(32) static const int16_t delta_int_table[2640] = { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, // 2 Diagonal mode + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 67, 68, 69, 70, 71, 72, + 0, 1, 2, 3, 4, 4, 5, 6, 7, 8, 8, 9, 10, 11, 12, 13, 13, 14, 15, 16, 17, 17, 18, 19, 20, 21, 21, 22, 23, 24, 25, 26, 26, 27, 28, 29, 30, 30, 31, 32, 33, 34, 34, 35, 36, 37, 38, 39, 39, 40, 41, 42, 43, 43, 44, 45, 46, 47, 47, 48, 49, 50, 51, 52, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 60, 61, 62, 63, 64, 65, + 0, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 8, 9, 10, 10, 11, 12, 12, 13, 14, 15, 15, 16, 17, 17, 18, 19, 20, 20, 21, 22, 23, 23, 24, 25, 25, 26, 27, 28, 28, 29, 30, 30, 31, 32, 33, 33, 34, 35, 35, 36, 37, 38, 38, 39, 40, 40, 41, 42, 43, 43, 44, 45, 46, 46, 47, 48, 48, 49, 50, 51, 51, 52, 53, 53, 54, 55, 56, 56, 57, + 0, 1, 1, 2, 3, 3, 4, 5, 5, 6, 6, 7, 8, 8, 9, 10, 10, 11, 11, 12, 13, 13, 14, 15, 15, 16, 16, 17, 18, 18, 19, 20, 20, 21, 21, 22, 23, 23, 24, 25, 25, 26, 26, 27, 28, 28, 29, 30, 30, 31, 31, 32, 33, 33, 34, 35, 35, 36, 36, 37, 38, 38, 39, 40, 40, 41, 41, 42, 43, 43, 44, 45, 45, 46, 46, 47, 48, 48, 49, 50, // 6 + 0, 1, 1, 2, 2, 3, 3, 4, 5, 5, 6, 6, 7, 7, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 14, 14, 15, 15, 16, 16, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 30, 31, 32, 32, 33, 33, 34, 34, 35, 36, 36, 37, 37, 38, 38, 39, 39, 40, 41, 41, 42, 42, 43, 43, 44, 45, + 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30, 30, 31, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37, 38, 38, 39, 39, 40, + 0, 0, 1, 1, 2, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9, 10, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 14, 15, 15, 16, 16, 17, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 21, 22, 22, 23, 23, 24, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 28, 29, 29, 30, 30, 31, 31, 31, 32, 32, 33, 33, 34, 34, 35, + 0, 0, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 12, 12, 12, 13, 13, 13, 14, 14, 15, 15, 15, 16, 16, 16, 17, 17, 18, 18, 18, 19, 19, 19, 20, 20, 21, 21, 21, 22, 22, 22, 23, 23, 24, 24, 24, 25, 25, 25, 26, 26, 27, 27, 27, 28, 28, 28, 29, 29, 30, // 10 + 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 12, 12, 12, 13, 13, 13, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 22, 22, 22, 23, 23, 23, 24, 24, 24, 25, + 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, 20, + 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 15, + 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, // 14 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 18 Pure horizontal or vertical mode + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -6, -6, -6, -6, -6, -6, -6, -6, -6, -6, -6, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -8, -8, -8, -8, -8, -8, + -1, -1, -1, -1, -1, -1, -1, -1, -2, -2, -2, -2, -2, -2, -2, -2, -3, -3, -3, -3, -3, -3, -3, -3, -4, -4, -4, -4, -4, -4, -4, -4, -5, -5, -5, -5, -5, -5, -5, -5, -6, -6, -6, -6, -6, -6, -6, -6, -7, -7, -7, -7, -7, -7, -7, -7, -8, -8, -8, -8, -8, -8, -8, -8, -9, -9, -9, -9, -9, -9, -9, -9, -10, -10, -10, -10, -10, -10, -10, -10, // 22 + -1, -1, -1, -1, -1, -2, -2, -2, -2, -2, -3, -3, -3, -3, -3, -3, -4, -4, -4, -4, -4, -5, -5, -5, -5, -5, -6, -6, -6, -6, -6, -6, -7, -7, -7, -7, -7, -8, -8, -8, -8, -8, -9, -9, -9, -9, -9, -9, -10, -10, -10, -10, -10, -11, -11, -11, -11, -11, -12, -12, -12, -12, -12, -12, -13, -13, -13, -13, -13, -14, -14, -14, -14, -14, -15, -15, -15, -15, -15, -15, + -1, -1, -1, -1, -2, -2, -2, -2, -3, -3, -3, -3, -4, -4, -4, -4, -5, -5, -5, -5, -6, -6, -6, -6, -7, -7, -7, -7, -8, -8, -8, -8, -9, -9, -9, -9, -10, -10, -10, -10, -11, -11, -11, -11, -12, -12, -12, -12, -13, -13, -13, -13, -14, -14, -14, -14, -15, -15, -15, -15, -16, -16, -16, -16, -17, -17, -17, -17, -18, -18, -18, -18, -19, -19, -19, -19, -20, -20, -20, -20, + -1, -1, -1, -2, -2, -2, -3, -3, -3, -4, -4, -4, -5, -5, -5, -5, -6, -6, -6, -7, -7, -7, -8, -8, -8, -9, -9, -9, -10, -10, -10, -10, -11, -11, -11, -12, -12, -12, -13, -13, -13, -14, -14, -14, -15, -15, -15, -15, -16, -16, -16, -17, -17, -17, -18, -18, -18, -19, -19, -19, -20, -20, -20, -20, -21, -21, -21, -22, -22, -22, -23, -23, -23, -24, -24, -24, -25, -25, -25, -25, + -1, -1, -2, -2, -2, -3, -3, -3, -4, -4, -5, -5, -5, -6, -6, -6, -7, -7, -8, -8, -8, -9, -9, -9, -10, -10, -11, -11, -11, -12, -12, -12, -13, -13, -14, -14, -14, -15, -15, -15, -16, -16, -17, -17, -17, -18, -18, -18, -19, -19, -20, -20, -20, -21, -21, -21, -22, -22, -23, -23, -23, -24, -24, -24, -25, -25, -26, -26, -26, -27, -27, -27, -28, -28, -29, -29, -29, -30, -30, -30, // 26 + -1, -1, -2, -2, -3, -3, -4, -4, -4, -5, -5, -6, -6, -7, -7, -7, -8, -8, -9, -9, -10, -10, -11, -11, -11, -12, -12, -13, -13, -14, -14, -14, -15, -15, -16, -16, -17, -17, -18, -18, -18, -19, -19, -20, -20, -21, -21, -21, -22, -22, -23, -23, -24, -24, -25, -25, -25, -26, -26, -27, -27, -28, -28, -28, -29, -29, -30, -30, -31, -31, -32, -32, -32, -33, -33, -34, -34, -35, -35, -35, + -1, -1, -2, -2, -3, -3, -4, -4, -5, -5, -6, -6, -7, -7, -8, -8, -9, -9, -10, -10, -11, -11, -12, -12, -13, -13, -14, -14, -15, -15, -16, -16, -17, -17, -18, -18, -19, -19, -20, -20, -21, -21, -22, -22, -23, -23, -24, -24, -25, -25, -26, -26, -27, -27, -28, -28, -29, -29, -30, -30, -31, -31, -32, -32, -33, -33, -34, -34, -35, -35, -36, -36, -37, -37, -38, -38, -39, -39, -40, -40, + -1, -2, -2, -3, -3, -4, -4, -5, -6, -6, -7, -7, -8, -8, -9, -9, -10, -11, -11, -12, -12, -13, -13, -14, -15, -15, -16, -16, -17, -17, -18, -18, -19, -20, -20, -21, -21, -22, -22, -23, -24, -24, -25, -25, -26, -26, -27, -27, -28, -29, -29, -30, -30, -31, -31, -32, -33, -33, -34, -34, -35, -35, -36, -36, -37, -38, -38, -39, -39, -40, -40, -41, -42, -42, -43, -43, -44, -44, -45, -45, + -1, -2, -2, -3, -4, -4, -5, -5, -6, -7, -7, -8, -9, -9, -10, -10, -11, -12, -12, -13, -14, -14, -15, -15, -16, -17, -17, -18, -19, -19, -20, -20, -21, -22, -22, -23, -24, -24, -25, -25, -26, -27, -27, -28, -29, -29, -30, -30, -31, -32, -32, -33, -34, -34, -35, -35, -36, -37, -37, -38, -39, -39, -40, -40, -41, -42, -42, -43, -44, -44, -45, -45, -46, -47, -47, -48, -49, -49, -50, -50, // 30 + -1, -2, -3, -3, -4, -5, -6, -6, -7, -8, -8, -9, -10, -11, -11, -12, -13, -13, -14, -15, -16, -16, -17, -18, -18, -19, -20, -21, -21, -22, -23, -23, -24, -25, -26, -26, -27, -28, -29, -29, -30, -31, -31, -32, -33, -34, -34, -35, -36, -36, -37, -38, -39, -39, -40, -41, -41, -42, -43, -44, -44, -45, -46, -46, -47, -48, -49, -49, -50, -51, -52, -52, -53, -54, -54, -55, -56, -57, -57, -58, + -1, -2, -3, -4, -5, -5, -6, -7, -8, -9, -9, -10, -11, -12, -13, -13, -14, -15, -16, -17, -18, -18, -19, -20, -21, -22, -22, -23, -24, -25, -26, -26, -27, -28, -29, -30, -31, -31, -32, -33, -34, -35, -35, -36, -37, -38, -39, -39, -40, -41, -42, -43, -44, -44, -45, -46, -47, -48, -48, -49, -50, -51, -52, -52, -53, -54, -55, -56, -57, -57, -58, -59, -60, -61, -61, -62, -63, -64, -65, -65, + -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -10, -11, -12, -13, -14, -15, -16, -17, -18, -19, -20, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -29, -30, -31, -32, -33, -34, -35, -36, -37, -38, -39, -39, -40, -41, -42, -43, -44, -45, -46, -47, -48, -49, -49, -50, -51, -52, -53, -54, -55, -56, -57, -58, -58, -59, -60, -61, -62, -63, -64, -65, -66, -67, -68, -68, -69, -70, -71, -72, -73, + -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31, -32, -33, -34, -35, -36, -37, -38, -39, -40, -41, -42, -43, -44, -45, -46, -47, -48, -49, -50, -51, -52, -53, -54, -55, -56, -57, -58, -59, -60, -61, -62, -63, -64, -65, -66, -67, -68, -69, -70, -71, -72, -73, -74, -75, -76, -77, -78, -79, -80, // 34 Diagonal mode +}; + + +// OPTIONAL TODO: This table can be cut to 32 width, the second 32 width half (and all repeating 32 item chunks) is identical to the first. For easy access, leave the table as is, otherwise some modulo operations are necessary. +ALIGNED(32) static const int16_t delta_fract_table[2640] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2 Diagonal mode +29, 26, 23, 20, 17, 14, 11, 8, 5, 2, 31, 28, 25, 22, 19, 16, 13, 10, 7, 4, 1, 30, 27, 24, 21, 18, 15, 12, 9, 6, 3, 0, 29, 26, 23, 20, 17, 14, 11, 8, 5, 2, 31, 28, 25, 22, 19, 16, 13, 10, 7, 4, 1, 30, 27, 24, 21, 18, 15, 12, 9, 6, 3, 0, 29, 26, 23, 20, 17, 14, 11, 8, 5, 2, 31, 28, 25, 22, 19, 16, +26, 20, 14, 8, 2, 28, 22, 16, 10, 4, 30, 24, 18, 12, 6, 0, 26, 20, 14, 8, 2, 28, 22, 16, 10, 4, 30, 24, 18, 12, 6, 0, 26, 20, 14, 8, 2, 28, 22, 16, 10, 4, 30, 24, 18, 12, 6, 0, 26, 20, 14, 8, 2, 28, 22, 16, 10, 4, 30, 24, 18, 12, 6, 0, 26, 20, 14, 8, 2, 28, 22, 16, 10, 4, 30, 24, 18, 12, 6, 0, +23, 14, 5, 28, 19, 10, 1, 24, 15, 6, 29, 20, 11, 2, 25, 16, 7, 30, 21, 12, 3, 26, 17, 8, 31, 22, 13, 4, 27, 18, 9, 0, 23, 14, 5, 28, 19, 10, 1, 24, 15, 6, 29, 20, 11, 2, 25, 16, 7, 30, 21, 12, 3, 26, 17, 8, 31, 22, 13, 4, 27, 18, 9, 0, 23, 14, 5, 28, 19, 10, 1, 24, 15, 6, 29, 20, 11, 2, 25, 16, +20, 8, 28, 16, 4, 24, 12, 0, 20, 8, 28, 16, 4, 24, 12, 0, 20, 8, 28, 16, 4, 24, 12, 0, 20, 8, 28, 16, 4, 24, 12, 0, 20, 8, 28, 16, 4, 24, 12, 0, 20, 8, 28, 16, 4, 24, 12, 0, 20, 8, 28, 16, 4, 24, 12, 0, 20, 8, 28, 16, 4, 24, 12, 0, 20, 8, 28, 16, 4, 24, 12, 0, 20, 8, 28, 16, 4, 24, 12, 0, // 6 +18, 4, 22, 8, 26, 12, 30, 16, 2, 20, 6, 24, 10, 28, 14, 0, 18, 4, 22, 8, 26, 12, 30, 16, 2, 20, 6, 24, 10, 28, 14, 0, 18, 4, 22, 8, 26, 12, 30, 16, 2, 20, 6, 24, 10, 28, 14, 0, 18, 4, 22, 8, 26, 12, 30, 16, 2, 20, 6, 24, 10, 28, 14, 0, 18, 4, 22, 8, 26, 12, 30, 16, 2, 20, 6, 24, 10, 28, 14, 0, +16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, +14, 28, 10, 24, 6, 20, 2, 16, 30, 12, 26, 8, 22, 4, 18, 0, 14, 28, 10, 24, 6, 20, 2, 16, 30, 12, 26, 8, 22, 4, 18, 0, 14, 28, 10, 24, 6, 20, 2, 16, 30, 12, 26, 8, 22, 4, 18, 0, 14, 28, 10, 24, 6, 20, 2, 16, 30, 12, 26, 8, 22, 4, 18, 0, 14, 28, 10, 24, 6, 20, 2, 16, 30, 12, 26, 8, 22, 4, 18, 0, +12, 24, 4, 16, 28, 8, 20, 0, 12, 24, 4, 16, 28, 8, 20, 0, 12, 24, 4, 16, 28, 8, 20, 0, 12, 24, 4, 16, 28, 8, 20, 0, 12, 24, 4, 16, 28, 8, 20, 0, 12, 24, 4, 16, 28, 8, 20, 0, 12, 24, 4, 16, 28, 8, 20, 0, 12, 24, 4, 16, 28, 8, 20, 0, 12, 24, 4, 16, 28, 8, 20, 0, 12, 24, 4, 16, 28, 8, 20, 0, // 10 +10, 20, 30, 8, 18, 28, 6, 16, 26, 4, 14, 24, 2, 12, 22, 0, 10, 20, 30, 8, 18, 28, 6, 16, 26, 4, 14, 24, 2, 12, 22, 0, 10, 20, 30, 8, 18, 28, 6, 16, 26, 4, 14, 24, 2, 12, 22, 0, 10, 20, 30, 8, 18, 28, 6, 16, 26, 4, 14, 24, 2, 12, 22, 0, 10, 20, 30, 8, 18, 28, 6, 16, 26, 4, 14, 24, 2, 12, 22, 0, + 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24, 0, + 6, 12, 18, 24, 30, 4, 10, 16, 22, 28, 2, 8, 14, 20, 26, 0, 6, 12, 18, 24, 30, 4, 10, 16, 22, 28, 2, 8, 14, 20, 26, 0, 6, 12, 18, 24, 30, 4, 10, 16, 22, 28, 2, 8, 14, 20, 26, 0, 6, 12, 18, 24, 30, 4, 10, 16, 22, 28, 2, 8, 14, 20, 26, 0, 6, 12, 18, 24, 30, 4, 10, 16, 22, 28, 2, 8, 14, 20, 26, 0, + 4, 8, 12, 16, 20, 24, 28, 0, 4, 8, 12, 16, 20, 24, 28, 0, 4, 8, 12, 16, 20, 24, 28, 0, 4, 8, 12, 16, 20, 24, 28, 0, 4, 8, 12, 16, 20, 24, 28, 0, 4, 8, 12, 16, 20, 24, 28, 0, 4, 8, 12, 16, 20, 24, 28, 0, 4, 8, 12, 16, 20, 24, 28, 0, 4, 8, 12, 16, 20, 24, 28, 0, 4, 8, 12, 16, 20, 24, 28, 0, // 14 + 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 2, 5, 8, 11, 14, 17, 20, 23, 26, 29, 0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 2, 5, 8, 11, 14, 17, 20, 23, 26, 29, 0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 1, 4, 7, 10, 13, 16, + 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 0, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 18 Pure horizontal or vertical mode +31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, +30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0, +29, 26, 23, 20, 17, 14, 11, 8, 5, 2, 31, 28, 25, 22, 19, 16, 13, 10, 7, 4, 1, 30, 27, 24, 21, 18, 15, 12, 9, 6, 3, 0, 29, 26, 23, 20, 17, 14, 11, 8, 5, 2, 31, 28, 25, 22, 19, 16, 13, 10, 7, 4, 1, 30, 27, 24, 21, 18, 15, 12, 9, 6, 3, 0, 29, 26, 23, 20, 17, 14, 11, 8, 5, 2, 31, 28, 25, 22, 19, 16, +28, 24, 20, 16, 12, 8, 4, 0, 28, 24, 20, 16, 12, 8, 4, 0, 28, 24, 20, 16, 12, 8, 4, 0, 28, 24, 20, 16, 12, 8, 4, 0, 28, 24, 20, 16, 12, 8, 4, 0, 28, 24, 20, 16, 12, 8, 4, 0, 28, 24, 20, 16, 12, 8, 4, 0, 28, 24, 20, 16, 12, 8, 4, 0, 28, 24, 20, 16, 12, 8, 4, 0, 28, 24, 20, 16, 12, 8, 4, 0, // 22 +26, 20, 14, 8, 2, 28, 22, 16, 10, 4, 30, 24, 18, 12, 6, 0, 26, 20, 14, 8, 2, 28, 22, 16, 10, 4, 30, 24, 18, 12, 6, 0, 26, 20, 14, 8, 2, 28, 22, 16, 10, 4, 30, 24, 18, 12, 6, 0, 26, 20, 14, 8, 2, 28, 22, 16, 10, 4, 30, 24, 18, 12, 6, 0, 26, 20, 14, 8, 2, 28, 22, 16, 10, 4, 30, 24, 18, 12, 6, 0, +24, 16, 8, 0, 24, 16, 8, 0, 24, 16, 8, 0, 24, 16, 8, 0, 24, 16, 8, 0, 24, 16, 8, 0, 24, 16, 8, 0, 24, 16, 8, 0, 24, 16, 8, 0, 24, 16, 8, 0, 24, 16, 8, 0, 24, 16, 8, 0, 24, 16, 8, 0, 24, 16, 8, 0, 24, 16, 8, 0, 24, 16, 8, 0, 24, 16, 8, 0, 24, 16, 8, 0, 24, 16, 8, 0, 24, 16, 8, 0, +22, 12, 2, 24, 14, 4, 26, 16, 6, 28, 18, 8, 30, 20, 10, 0, 22, 12, 2, 24, 14, 4, 26, 16, 6, 28, 18, 8, 30, 20, 10, 0, 22, 12, 2, 24, 14, 4, 26, 16, 6, 28, 18, 8, 30, 20, 10, 0, 22, 12, 2, 24, 14, 4, 26, 16, 6, 28, 18, 8, 30, 20, 10, 0, 22, 12, 2, 24, 14, 4, 26, 16, 6, 28, 18, 8, 30, 20, 10, 0, +20, 8, 28, 16, 4, 24, 12, 0, 20, 8, 28, 16, 4, 24, 12, 0, 20, 8, 28, 16, 4, 24, 12, 0, 20, 8, 28, 16, 4, 24, 12, 0, 20, 8, 28, 16, 4, 24, 12, 0, 20, 8, 28, 16, 4, 24, 12, 0, 20, 8, 28, 16, 4, 24, 12, 0, 20, 8, 28, 16, 4, 24, 12, 0, 20, 8, 28, 16, 4, 24, 12, 0, 20, 8, 28, 16, 4, 24, 12, 0, // 26 +18, 4, 22, 8, 26, 12, 30, 16, 2, 20, 6, 24, 10, 28, 14, 0, 18, 4, 22, 8, 26, 12, 30, 16, 2, 20, 6, 24, 10, 28, 14, 0, 18, 4, 22, 8, 26, 12, 30, 16, 2, 20, 6, 24, 10, 28, 14, 0, 18, 4, 22, 8, 26, 12, 30, 16, 2, 20, 6, 24, 10, 28, 14, 0, 18, 4, 22, 8, 26, 12, 30, 16, 2, 20, 6, 24, 10, 28, 14, 0, +16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, +14, 28, 10, 24, 6, 20, 2, 16, 30, 12, 26, 8, 22, 4, 18, 0, 14, 28, 10, 24, 6, 20, 2, 16, 30, 12, 26, 8, 22, 4, 18, 0, 14, 28, 10, 24, 6, 20, 2, 16, 30, 12, 26, 8, 22, 4, 18, 0, 14, 28, 10, 24, 6, 20, 2, 16, 30, 12, 26, 8, 22, 4, 18, 0, 14, 28, 10, 24, 6, 20, 2, 16, 30, 12, 26, 8, 22, 4, 18, 0, +12, 24, 4, 16, 28, 8, 20, 0, 12, 24, 4, 16, 28, 8, 20, 0, 12, 24, 4, 16, 28, 8, 20, 0, 12, 24, 4, 16, 28, 8, 20, 0, 12, 24, 4, 16, 28, 8, 20, 0, 12, 24, 4, 16, 28, 8, 20, 0, 12, 24, 4, 16, 28, 8, 20, 0, 12, 24, 4, 16, 28, 8, 20, 0, 12, 24, 4, 16, 28, 8, 20, 0, 12, 24, 4, 16, 28, 8, 20, 0, // 30 + 9, 18, 27, 4, 13, 22, 31, 8, 17, 26, 3, 12, 21, 30, 7, 16, 25, 2, 11, 20, 29, 6, 15, 24, 1, 10, 19, 28, 5, 14, 23, 0, 9, 18, 27, 4, 13, 22, 31, 8, 17, 26, 3, 12, 21, 30, 7, 16, 25, 2, 11, 20, 29, 6, 15, 24, 1, 10, 19, 28, 5, 14, 23, 0, 9, 18, 27, 4, 13, 22, 31, 8, 17, 26, 3, 12, 21, 30, 7, 16, + 6, 12, 18, 24, 30, 4, 10, 16, 22, 28, 2, 8, 14, 20, 26, 0, 6, 12, 18, 24, 30, 4, 10, 16, 22, 28, 2, 8, 14, 20, 26, 0, 6, 12, 18, 24, 30, 4, 10, 16, 22, 28, 2, 8, 14, 20, 26, 0, 6, 12, 18, 24, 30, 4, 10, 16, 22, 28, 2, 8, 14, 20, 26, 0, 6, 12, 18, 24, 30, 4, 10, 16, 22, 28, 2, 8, 14, 20, 26, 0, + 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 2, 5, 8, 11, 14, 17, 20, 23, 26, 29, 0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 2, 5, 8, 11, 14, 17, 20, 23, 26, 29, 0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 1, 4, 7, 10, 13, 16, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 34 Diagonal mode +}; + + +// Delta int and delta fract wide angle tables. Rows are corrected prediction mode, columns y offset. (or x offset for horizontal modes) +ALIGNED(32) static const int16_t delta_int_wide_angle_table[1200] = { + 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, 864, 896, 928, 960, 992, 1024, 1056, 1088, 1120, 1152, 1184, 1216, 1248, 1280, 1312, 1344, 1376, 1408, 1440, 1472, 1504, 1536, 1568, 1600, 1632, 1664, 1696, 1728, 1760, 1792, 1824, 1856, 1888, 1920, 1952, 1984, 2016, 2048, 2080, 2112, 2144, 2176, 2208, 2240, 2272, 2304, 2336, 2368, 2400, 2432, 2464, 2496, 2528, 2560, // -13 Non-fractional angle + 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240, 256, 272, 288, 304, 320, 336, 352, 368, 384, 400, 416, 432, 448, 464, 480, 496, 512, 528, 544, 560, 576, 592, 608, 624, 640, 656, 672, 688, 704, 720, 736, 752, 768, 784, 800, 816, 832, 848, 864, 880, 896, 912, 928, 944, 960, 976, 992, 1008, 1024, 1040, 1056, 1072, 1088, 1104, 1120, 1136, 1152, 1168, 1184, 1200, 1216, 1232, 1248, 1264, 1280, // -12 Non-fractional angle + 10, 21, 31, 42, 53, 63, 74, 85, 95, 106, 117, 127, 138, 149, 159, 170, 181, 191, 202, 213, 223, 234, 245, 255, 266, 277, 287, 298, 309, 319, 330, 341, 351, 362, 372, 383, 394, 404, 415, 426, 436, 447, 458, 468, 479, 490, 500, 511, 522, 532, 543, 554, 564, 575, 586, 596, 607, 618, 628, 639, 650, 660, 671, 682, 692, 703, 713, 724, 735, 745, 756, 767, 777, 788, 799, 809, 820, 831, 841, 852, // -11 + 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 264, 272, 280, 288, 296, 304, 312, 320, 328, 336, 344, 352, 360, 368, 376, 384, 392, 400, 408, 416, 424, 432, 440, 448, 456, 464, 472, 480, 488, 496, 504, 512, 520, 528, 536, 544, 552, 560, 568, 576, 584, 592, 600, 608, 616, 624, 632, 640, // -10 Non-fractional angle + 5, 10, 16, 21, 26, 32, 37, 42, 48, 53, 58, 64, 69, 74, 80, 85, 90, 96, 101, 106, 112, 117, 122, 128, 133, 138, 144, 149, 154, 160, 165, 171, 176, 181, 187, 192, 197, 203, 208, 213, 219, 224, 229, 235, 240, 245, 251, 256, 261, 267, 272, 277, 283, 288, 293, 299, 304, 309, 315, 320, 325, 331, 336, 342, 347, 352, 358, 363, 368, 374, 379, 384, 390, 395, 400, 406, 411, 416, 422, 427, // -9 + 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, 100, 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 148, 152, 156, 160, 164, 168, 172, 176, 180, 184, 188, 192, 196, 200, 204, 208, 212, 216, 220, 224, 228, 232, 236, 240, 244, 248, 252, 256, 260, 264, 268, 272, 276, 280, 284, 288, 292, 296, 300, 304, 308, 312, 316, 320, // -8 Non-fractional angle + 3, 6, 9, 12, 15, 19, 22, 25, 28, 31, 35, 38, 41, 44, 47, 51, 54, 57, 60, 63, 66, 70, 73, 76, 79, 82, 86, 89, 92, 95, 98, 102, 105, 108, 111, 114, 117, 121, 124, 127, 130, 133, 137, 140, 143, 146, 149, 153, 156, 159, 162, 165, 168, 172, 175, 178, 181, 184, 188, 191, 194, 197, 200, 204, 207, 210, 213, 216, 219, 223, 226, 229, 232, 235, 239, 242, 245, 248, 251, 255, // -7 + 2, 5, 8, 10, 13, 16, 18, 21, 24, 26, 29, 32, 34, 37, 40, 43, 45, 48, 51, 53, 56, 59, 61, 64, 67, 69, 72, 75, 77, 80, 83, 86, 88, 91, 94, 96, 99, 102, 104, 107, 110, 112, 115, 118, 120, 123, 126, 129, 131, 134, 137, 139, 142, 145, 147, 150, 153, 155, 158, 161, 163, 166, 169, 172, 174, 177, 180, 182, 185, 188, 190, 193, 196, 198, 201, 204, 206, 209, 212, 215, // -6 + 2, 4, 6, 9, 11, 13, 15, 18, 20, 22, 25, 27, 29, 31, 34, 36, 38, 41, 43, 45, 47, 50, 52, 54, 57, 59, 61, 63, 66, 68, 70, 73, 75, 77, 79, 82, 84, 86, 88, 91, 93, 95, 98, 100, 102, 104, 107, 109, 111, 114, 116, 118, 120, 123, 125, 127, 130, 132, 134, 136, 139, 141, 143, 146, 148, 150, 152, 155, 157, 159, 161, 164, 166, 168, 171, 173, 175, 177, 180, 182, // -5 + 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 142, 144, 146, 148, 150, 152, 154, 156, 158, 160, // -4 Non-fractional angle + 1, 3, 5, 7, 8, 10, 12, 14, 16, 17, 19, 21, 23, 24, 26, 28, 30, 32, 33, 35, 37, 39, 40, 42, 44, 46, 48, 49, 51, 53, 55, 57, 58, 60, 62, 64, 65, 67, 69, 71, 73, 74, 76, 78, 80, 81, 83, 85, 87, 89, 90, 92, 94, 96, 97, 99, 101, 103, 105, 106, 108, 110, 112, 114, 115, 117, 119, 121, 122, 124, 126, 128, 130, 131, 133, 135, 137, 138, 140, 142, // -3 + 1, 3, 4, 6, 7, 9, 11, 12, 14, 15, 17, 19, 20, 22, 23, 25, 27, 28, 30, 31, 33, 35, 36, 38, 39, 41, 43, 44, 46, 47, 49, 51, 52, 54, 55, 57, 58, 60, 62, 63, 65, 66, 68, 70, 71, 73, 74, 76, 78, 79, 81, 82, 84, 86, 87, 89, 90, 92, 94, 95, 97, 98, 100, 102, 103, 105, 106, 108, 109, 111, 113, 114, 116, 117, 119, 121, 122, 124, 125, 127, // -2 + 1, 2, 4, 5, 7, 8, 9, 11, 12, 14, 15, 16, 18, 19, 21, 22, 23, 25, 26, 28, 29, 30, 32, 33, 35, 36, 37, 39, 40, 42, 43, 45, 46, 47, 49, 50, 52, 53, 54, 56, 57, 59, 60, 61, 63, 64, 66, 67, 68, 70, 71, 73, 74, 75, 77, 78, 80, 81, 82, 84, 85, 87, 88, 90, 91, 92, 94, 95, 97, 98, 99, 101, 102, 104, 105, 106, 108, 109, 111, 112, // -1 + 1, 2, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 15, 17, 18, 19, 20, 21, 23, 24, 25, 26, 28, 29, 30, 31, 32, 34, 35, 36, 37, 39, 40, 41, 42, 43, 45, 46, 47, 48, 49, 51, 52, 53, 54, 56, 57, 58, 59, 60, 62, 63, 64, 65, 67, 68, 69, 70, 71, 73, 74, 75, 76, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 90, 91, 92, 93, 95, 96, 97, // 0 + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 82, 83, 84, 85, 86, 87, // 1 +}; + +// OPTIONAL TODO: This table can be cut to 32 width, the second 32 width half (and all repeating 32 item chunks) is identical to the first. For easy access, leave the table as is, otherwise some modulo operations are necessary. +ALIGNED(32) static const int16_t delta_fract_wide_angle_table[1200] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // -13 Non-fractional angle + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // -12 Non-fractional angle +21, 10, 31, 20, 9, 30, 19, 8, 29, 18, 7, 28, 17, 6, 27, 16, 5, 26, 15, 4, 25, 14, 3, 24, 13, 2, 23, 12, 1, 22, 11, 0, 21, 10, 31, 20, 9, 30, 19, 8, 29, 18, 7, 28, 17, 6, 27, 16, 5, 26, 15, 4, 25, 14, 3, 24, 13, 2, 23, 12, 1, 22, 11, 0, 21, 10, 31, 20, 9, 30, 19, 8, 29, 18, 7, 28, 17, 6, 27, 16, // -11 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // -10 Non-fractional angle +11, 22, 1, 12, 23, 2, 13, 24, 3, 14, 25, 4, 15, 26, 5, 16, 27, 6, 17, 28, 7, 18, 29, 8, 19, 30, 9, 20, 31, 10, 21, 0, 11, 22, 1, 12, 23, 2, 13, 24, 3, 14, 25, 4, 15, 26, 5, 16, 27, 6, 17, 28, 7, 18, 29, 8, 19, 30, 9, 20, 31, 10, 21, 0, 11, 22, 1, 12, 23, 2, 13, 24, 3, 14, 25, 4, 15, 26, 5, 16, // -9 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // -8 Non-fractional angle + 6, 12, 18, 24, 30, 4, 10, 16, 22, 28, 2, 8, 14, 20, 26, 0, 6, 12, 18, 24, 30, 4, 10, 16, 22, 28, 2, 8, 14, 20, 26, 0, 6, 12, 18, 24, 30, 4, 10, 16, 22, 28, 2, 8, 14, 20, 26, 0, 6, 12, 18, 24, 30, 4, 10, 16, 22, 28, 2, 8, 14, 20, 26, 0, 6, 12, 18, 24, 30, 4, 10, 16, 22, 28, 2, 8, 14, 20, 26, 0, // -7 +22, 12, 2, 24, 14, 4, 26, 16, 6, 28, 18, 8, 30, 20, 10, 0, 22, 12, 2, 24, 14, 4, 26, 16, 6, 28, 18, 8, 30, 20, 10, 0, 22, 12, 2, 24, 14, 4, 26, 16, 6, 28, 18, 8, 30, 20, 10, 0, 22, 12, 2, 24, 14, 4, 26, 16, 6, 28, 18, 8, 30, 20, 10, 0, 22, 12, 2, 24, 14, 4, 26, 16, 6, 28, 18, 8, 30, 20, 10, 0, // -6 + 9, 18, 27, 4, 13, 22, 31, 8, 17, 26, 3, 12, 21, 30, 7, 16, 25, 2, 11, 20, 29, 6, 15, 24, 1, 10, 19, 28, 5, 14, 23, 0, 9, 18, 27, 4, 13, 22, 31, 8, 17, 26, 3, 12, 21, 30, 7, 16, 25, 2, 11, 20, 29, 6, 15, 24, 1, 10, 19, 28, 5, 14, 23, 0, 9, 18, 27, 4, 13, 22, 31, 8, 17, 26, 3, 12, 21, 30, 7, 16, // -5 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // -4 Non-fractional angle +25, 18, 11, 4, 29, 22, 15, 8, 1, 26, 19, 12, 5, 30, 23, 16, 9, 2, 27, 20, 13, 6, 31, 24, 17, 10, 3, 28, 21, 14, 7, 0, 25, 18, 11, 4, 29, 22, 15, 8, 1, 26, 19, 12, 5, 30, 23, 16, 9, 2, 27, 20, 13, 6, 31, 24, 17, 10, 3, 28, 21, 14, 7, 0, 25, 18, 11, 4, 29, 22, 15, 8, 1, 26, 19, 12, 5, 30, 23, 16, // -3 +19, 6, 25, 12, 31, 18, 5, 24, 11, 30, 17, 4, 23, 10, 29, 16, 3, 22, 9, 28, 15, 2, 21, 8, 27, 14, 1, 20, 7, 26, 13, 0, 19, 6, 25, 12, 31, 18, 5, 24, 11, 30, 17, 4, 23, 10, 29, 16, 3, 22, 9, 28, 15, 2, 21, 8, 27, 14, 1, 20, 7, 26, 13, 0, 19, 6, 25, 12, 31, 18, 5, 24, 11, 30, 17, 4, 23, 10, 29, 16, // -2 +13, 26, 7, 20, 1, 14, 27, 8, 21, 2, 15, 28, 9, 22, 3, 16, 29, 10, 23, 4, 17, 30, 11, 24, 5, 18, 31, 12, 25, 6, 19, 0, 13, 26, 7, 20, 1, 14, 27, 8, 21, 2, 15, 28, 9, 22, 3, 16, 29, 10, 23, 4, 17, 30, 11, 24, 5, 18, 31, 12, 25, 6, 19, 0, 13, 26, 7, 20, 1, 14, 27, 8, 21, 2, 15, 28, 9, 22, 3, 16, // -1 + 7, 14, 21, 28, 3, 10, 17, 24, 31, 6, 13, 20, 27, 2, 9, 16, 23, 30, 5, 12, 19, 26, 1, 8, 15, 22, 29, 4, 11, 18, 25, 0, 7, 14, 21, 28, 3, 10, 17, 24, 31, 6, 13, 20, 27, 2, 9, 16, 23, 30, 5, 12, 19, 26, 1, 8, 15, 22, 29, 4, 11, 18, 25, 0, 7, 14, 21, 28, 3, 10, 17, 24, 31, 6, 13, 20, 27, 2, 9, 16, // 0 + 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 2, 5, 8, 11, 14, 17, 20, 23, 26, 29, 0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 2, 5, 8, 11, 14, 17, 20, 23, 26, 29, 0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 1, 4, 7, 10, 13, 16, // 1 +}; + +static ALIGNED(16) const uint8_t intra_refbuild_shuffle_vectors_sidesize_32[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mode disp 0 + 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, // mode disp 1 + 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x0f, // mode disp 2 + 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x14, 0x0a, // mode disp 3 + 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x17, 0x0f, 0x07, // mode disp 4 + 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1a, 0x14, 0x0f, 0x0a, 0x04, // mode disp 5 + 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1b, 0x17, 0x13, 0x0f, 0x0b, 0x07, 0x03, // mode disp 6 + 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1c, 0x19, 0x15, 0x12, 0x0f, 0x0c, 0x09, 0x05, 0x02, // mode disp 7 + 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1c, 0x1a, 0x17, 0x14, 0x12, 0x0f, 0x0c, 0x0a, 0x07, 0x04, 0x02, // mode disp 8 + 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1d, 0x1a, 0x18, 0x16, 0x14, 0x11, 0x0f, 0x0d, 0x0a, 0x08, 0x06, 0x04, 0x01, // mode disp 9 + 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1d, 0x1b, 0x19, 0x17, 0x15, 0x13, 0x11, 0x0f, 0x0d, 0x0b, 0x09, 0x07, 0x05, 0x03, 0x01, // mode disp 10 + 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1d, 0x1b, 0x1a, 0x18, 0x16, 0x14, 0x13, 0x11, 0x0f, 0x0d, 0x0b, 0x0a, 0x08, 0x06, 0x04, 0x03, 0x01, // mode disp 11 + 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1d, 0x1c, 0x1a, 0x19, 0x17, 0x15, 0x14, 0x12, 0x11, 0x0f, 0x0d, 0x0c, 0x0a, 0x09, 0x07, 0x05, 0x04, 0x02, 0x01, // mode disp 12 + 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1e, 0x1c, 0x1b, 0x19, 0x18, 0x17, 0x15, 0x14, 0x12, 0x11, 0x10, 0x0e, 0x0d, 0x0c, 0x0a, 0x09, 0x07, 0x06, 0x05, 0x03, 0x02, 0x00, // mode disp 13 + 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1e, 0x1d, 0x1b, 0x1a, 0x19, 0x18, 0x16, 0x15, 0x14, 0x13, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0b, 0x0a, 0x09, 0x08, 0x06, 0x05, 0x04, 0x03, 0x01, 0x00, // mode disp 14 + 0x1f, 0x1f, 0x1f, 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x03, 0x02, 0x01, 0x00, // mode disp 15 + 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, // mode disp 16 +}; + +static ALIGNED(16) const uint8_t intra_refbuild_shuffle_vectors_sidesize_64[] = { + //<-v0----------------------------------------------------------------------------------------->||<-v1----------------------------------------------------------------------------------------->||<-v2----------------------------------------------------------------------------------------->||<-v3-----------------------------------------------------------------------------------------> + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mode disp 0 + 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x1f, // mode disp 1 + 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x2f, 0x1f, 0x0f, // mode disp 2 + 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x34, 0x2a, 0x1f, 0x14, 0x0a, // mode disp 3 + 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x37, 0x2f, 0x27, 0x1f, 0x17, 0x0f, 0x07, // mode disp 4 + 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3a, 0x34, 0x2f, 0x2a, 0x24, 0x1f, 0x1a, 0x14, 0x0f, 0x0a, 0x04, // mode disp 5 + 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3b, 0x37, 0x33, 0x2f, 0x2b, 0x27, 0x23, 0x1f, 0x1b, 0x17, 0x13, 0x0f, 0x0b, 0x07, 0x03, // mode disp 6 + 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3c, 0x39, 0x35, 0x32, 0x2f, 0x2c, 0x29, 0x25, 0x22, 0x1f, 0x1c, 0x19, 0x15, 0x12, 0x0f, 0x0c, 0x09, 0x05, 0x02, // mode disp 7 + 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3c, 0x3a, 0x37, 0x34, 0x32, 0x2f, 0x2c, 0x2a, 0x27, 0x24, 0x22, 0x1f, 0x1c, 0x1a, 0x17, 0x14, 0x12, 0x0f, 0x0c, 0x0a, 0x07, 0x04, 0x02, // mode disp 8 + 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3d, 0x3a, 0x38, 0x36, 0x34, 0x31, 0x2f, 0x2d, 0x2a, 0x28, 0x26, 0x24, 0x21, 0x1f, 0x1d, 0x1a, 0x18, 0x16, 0x14, 0x11, 0x0f, 0x0d, 0x0a, 0x08, 0x06, 0x04, 0x01, // mode disp 9 + 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3d, 0x3b, 0x39, 0x37, 0x35, 0x33, 0x31, 0x2f, 0x2d, 0x2b, 0x29, 0x27, 0x25, 0x23, 0x21, 0x1f, 0x1d, 0x1b, 0x19, 0x17, 0x15, 0x13, 0x11, 0x0f, 0x0d, 0x0b, 0x09, 0x07, 0x05, 0x03, 0x01, // mode disp 10 + 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3d, 0x3b, 0x3a, 0x38, 0x36, 0x34, 0x33, 0x31, 0x2f, 0x2d, 0x2b, 0x2a, 0x28, 0x26, 0x24, 0x23, 0x21, 0x1f, 0x1d, 0x1b, 0x1a, 0x18, 0x16, 0x14, 0x13, 0x11, 0x0f, 0x0d, 0x0b, 0x0a, 0x08, 0x06, 0x04, 0x03, 0x01, // mode disp 11 + 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3d, 0x3c, 0x3a, 0x39, 0x37, 0x35, 0x34, 0x32, 0x31, 0x2f, 0x2d, 0x2c, 0x2a, 0x29, 0x27, 0x25, 0x24, 0x22, 0x21, 0x1f, 0x1d, 0x1c, 0x1a, 0x19, 0x17, 0x15, 0x14, 0x12, 0x11, 0x0f, 0x0d, 0x0c, 0x0a, 0x09, 0x07, 0x05, 0x04, 0x02, 0x01, // mode disp 12 + 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3e, 0x3c, 0x3b, 0x39, 0x38, 0x37, 0x35, 0x34, 0x32, 0x31, 0x30, 0x2e, 0x2d, 0x2c, 0x2a, 0x29, 0x27, 0x26, 0x25, 0x23, 0x22, 0x20, 0x1f, 0x1e, 0x1c, 0x1b, 0x19, 0x18, 0x17, 0x15, 0x14, 0x12, 0x11, 0x10, 0x0e, 0x0d, 0x0c, 0x0a, 0x09, 0x07, 0x06, 0x05, 0x03, 0x02, 0x00, // mode disp 13 + 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3e, 0x3d, 0x3b, 0x3a, 0x39, 0x38, 0x36, 0x35, 0x34, 0x33, 0x31, 0x30, 0x2f, 0x2e, 0x2d, 0x2b, 0x2a, 0x29, 0x28, 0x26, 0x25, 0x24, 0x23, 0x21, 0x20, 0x1f, 0x1e, 0x1d, 0x1b, 0x1a, 0x19, 0x18, 0x16, 0x15, 0x14, 0x13, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0b, 0x0a, 0x09, 0x08, 0x06, 0x05, 0x04, 0x03, 0x01, 0x00, // mode disp 14 + 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3e, 0x3d, 0x3c, 0x3b, 0x39, 0x38, 0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x31, 0x30, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29, 0x28, 0x27, 0x26, 0x25, 0x23, 0x22, 0x21, 0x20, 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x03, 0x02, 0x01, 0x00, // mode disp 15 + 0x3f, 0x3e, 0x3d, 0x3c, 0x3b, 0x3a, 0x39, 0x38, 0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x31, 0x30, 0x2f, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29, 0x28, 0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21, 0x20, 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, // mode disp 16 + //<-v0----------------------------------------------------------------------------------------->||<-v1----------------------------------------------------------------------------------------->||<-v2----------------------------------------------------------------------------------------->||<-v3-----------------------------------------------------------------------------------------> +}; + +#endif INTRA_AVX2_TABLES_H diff --git a/src/strategies/avx2/mip_data_avx2.h b/src/strategies/avx2/mip_data_avx2.h new file mode 100644 index 00000000..342b1b0c --- /dev/null +++ b/src/strategies/avx2/mip_data_avx2.h @@ -0,0 +1,604 @@ +/***************************************************************************** + * This file is part of uvg266 VVC encoder. + * + * Copyright (c) 2021, Tampere University, ITU/ISO/IEC, project contributors + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, this + * list of conditions and the following disclaimer in the documentation and/or + * other materials provided with the distribution. + * + * * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS + ****************************************************************************/ + + /** +* \ingroup Reconstruction +* \file +* MIP weight matrix data. +*/ + +/** \file MipData.h +\brief weight and bias data for matrix-based intra prediction (MIP) +*/ +#pragma once + +#define MIP_SHIFT_MATRIX 6 +#define MIP_OFFSET_MATRIX 32 +// MIP weight tables for AVX2. + +// This is the same table as used in generic version, but 16-bit. +static ALIGNED(32) const uint16_t uvg_mip_sid0_weights[16][16][4] = +{ + { + { 32, 30, 90, 28}, + { 32, 32, 72, 28}, + { 34, 77, 53, 30}, + { 51, 124, 36, 37}, + { 31, 31, 95, 37}, + { 33, 31, 70, 50}, + { 52, 80, 25, 60}, + { 78, 107, 1, 65}, + { 31, 29, 37, 95}, + { 38, 34, 19, 101}, + { 73, 85, 0, 81}, + { 92, 99, 0, 65}, + { 34, 29, 14, 111}, + { 48, 48, 7, 100}, + { 80, 91, 0, 74}, + { 89, 97, 0, 64} + }, + { + { 31, 23, 34, 29}, + { 31, 43, 34, 31}, + { 30, 95, 34, 32}, + { 29, 100, 35, 33}, + { 31, 23, 34, 29}, + { 31, 43, 34, 31}, + { 30, 95, 34, 32}, + { 29, 99, 35, 33}, + { 31, 24, 35, 29}, + { 31, 44, 34, 31}, + { 30, 95, 35, 32}, + { 29, 99, 35, 33}, + { 31, 24, 35, 30}, + { 31, 44, 35, 31}, + { 30, 95, 35, 32}, + { 29, 99, 35, 33} + }, + { + { 32, 32, 36, 58}, + { 32, 29, 26, 66}, + { 36, 37, 23, 61}, + { 79, 84, 3, 37}, + { 32, 32, 30, 69}, + { 33, 29, 24, 71}, + { 44, 16, 21, 70}, + { 96, 18, 0, 57}, + { 32, 31, 24, 74}, + { 33, 30, 23, 71}, + { 36, 24, 24, 71}, + { 59, 9, 16, 68}, + { 32, 32, 23, 75}, + { 33, 30, 24, 70}, + { 32, 30, 25, 71}, + { 36, 26, 25, 70} + }, + { + { 32, 33, 34, 32}, + { 32, 30, 22, 38}, + { 29, 46, 25, 38}, + { 53, 123, 28, 22}, + { 32, 33, 30, 37}, + { 32, 30, 21, 38}, + { 32, 40, 24, 38}, + { 64, 116, 26, 17}, + { 32, 32, 23, 49}, + { 32, 30, 21, 39}, + { 34, 39, 24, 37}, + { 72, 109, 23, 16}, + { 33, 31, 17, 60}, + { 32, 31, 21, 39}, + { 35, 41, 24, 37}, + { 72, 106, 22, 18} + }, + { + { 34, 25, 89, 20}, + { 38, 32, 47, 24}, + { 40, 86, 29, 27}, + { 38, 98, 32, 29}, + { 34, 31, 94, 40}, + { 44, 25, 83, 27}, + { 54, 72, 43, 16}, + { 47, 94, 33, 22}, + { 33, 31, 36, 94}, + { 43, 23, 51, 76}, + { 62, 55, 64, 25}, + { 57, 89, 38, 15}, + { 32, 32, 28, 101}, + { 38, 26, 33, 94}, + { 55, 38, 68, 47}, + { 59, 80, 52, 16} + }, + { + { 28, 30, 68, 29}, + { 23, 48, 23, 48}, + { 39, 98, 16, 42}, + { 84, 86, 20, 17}, + { 25, 31, 52, 74}, + { 38, 68, 5, 70}, + { 95, 78, 7, 21}, + { 127, 54, 12, 0}, + { 30, 47, 14, 107}, + { 79, 76, 0, 53}, + { 127, 59, 7, 1}, + { 127, 51, 9, 0}, + { 50, 71, 1, 96}, + { 109, 69, 7, 25}, + { 127, 56, 9, 0}, + { 123, 53, 13, 0} + }, + { + { 40, 20, 72, 18}, + { 48, 29, 44, 18}, + { 53, 81, 35, 18}, + { 48, 96, 33, 22}, + { 45, 23, 79, 49}, + { 61, 21, 56, 49}, + { 72, 52, 32, 48}, + { 65, 69, 20, 50}, + { 41, 27, 29, 96}, + { 49, 22, 28, 94}, + { 52, 22, 28, 93}, + { 49, 27, 27, 92}, + { 37, 29, 26, 98}, + { 39, 28, 28, 97}, + { 38, 28, 30, 97}, + { 38, 29, 30, 95} + }, + { + { 33, 27, 43, 27}, + { 32, 29, 31, 31}, + { 31, 73, 33, 31}, + { 35, 104, 34, 28}, + { 32, 30, 63, 22}, + { 33, 26, 33, 29}, + { 33, 57, 33, 30}, + { 37, 100, 35, 27}, + { 32, 31, 85, 25}, + { 34, 25, 39, 25}, + { 35, 39, 32, 28}, + { 40, 91, 35, 25}, + { 32, 30, 77, 50}, + { 34, 26, 54, 22}, + { 37, 31, 34, 27}, + { 45, 75, 34, 23} + }, + { + { 34, 25, 77, 19}, + { 36, 34, 56, 24}, + { 41, 83, 39, 30}, + { 47, 96, 28, 35}, + { 34, 31, 70, 65}, + { 38, 29, 53, 77}, + { 43, 36, 37, 83}, + { 48, 39, 28, 83}, + { 33, 31, 31, 98}, + { 33, 31, 30, 99}, + { 34, 30, 31, 98}, + { 36, 29, 31, 96}, + { 32, 32, 30, 97}, + { 32, 32, 31, 96}, + { 31, 33, 33, 96}, + { 32, 33, 34, 94} + }, + { + { 30, 30, 93, 19}, + { 31, 59, 67, 34}, + { 31, 79, 36, 59}, + { 30, 67, 17, 79}, + { 30, 38, 68, 69}, + { 29, 40, 43, 91}, + { 26, 35, 32, 101}, + { 23, 32, 30, 101}, + { 26, 34, 30, 101}, + { 23, 33, 30, 102}, + { 20, 32, 31, 102}, + { 18, 33, 32, 102}, + { 23, 33, 31, 100}, + { 20, 34, 32, 100}, + { 18, 35, 33, 100}, + { 18, 35, 33, 100} + }, + { + { 31, 54, 90, 26}, + { 32, 60, 53, 61}, + { 34, 49, 37, 84}, + { 34, 39, 35, 89}, + { 35, 38, 41, 88}, + { 35, 35, 32, 96}, + { 35, 31, 33, 96}, + { 35, 32, 35, 94}, + { 34, 34, 30, 97}, + { 35, 32, 33, 95}, + { 35, 32, 34, 94}, + { 35, 34, 34, 93}, + { 34, 34, 34, 93}, + { 35, 34, 34, 93}, + { 35, 34, 34, 92}, + { 36, 34, 35, 91} + }, + { + { 32, 29, 54, 24}, + { 31, 32, 34, 29}, + { 31, 43, 34, 29}, + { 32, 67, 36, 28}, + { 31, 34, 69, 37}, + { 31, 35, 46, 33}, + { 30, 35, 39, 33}, + { 30, 42, 39, 36}, + { 31, 35, 39, 88}, + { 30, 38, 41, 84}, + { 30, 39, 40, 81}, + { 39, 46, 38, 78}, + { 31, 36, 34, 96}, + { 34, 38, 37, 93}, + { 55, 42, 38, 82}, + { 89, 53, 38, 65} + }, + { + { 32, 33, 43, 29}, + { 32, 30, 29, 33}, + { 31, 47, 31, 33}, + { 33, 100, 31, 31}, + { 32, 33, 74, 25}, + { 32, 32, 34, 31}, + { 32, 33, 30, 33}, + { 32, 68, 30, 32}, + { 32, 31, 91, 40}, + { 32, 32, 58, 26}, + { 31, 31, 30, 32}, + { 31, 42, 30, 33}, + { 32, 31, 49, 85}, + { 32, 31, 83, 35}, + { 31, 33, 48, 29}, + { 31, 36, 32, 33} + }, + { + { 31, 29, 81, 35}, + { 32, 28, 34, 50}, + { 31, 75, 16, 43}, + { 34, 103, 29, 32}, + { 32, 32, 53, 78}, + { 31, 28, 36, 88}, + { 30, 52, 18, 73}, + { 52, 88, 17, 35}, + { 32, 32, 35, 94}, + { 30, 31, 35, 95}, + { 36, 29, 31, 92}, + { 100, 43, 16, 40}, + { 32, 32, 35, 93}, + { 30, 32, 38, 93}, + { 55, 18, 37, 83}, + { 127, 0, 30, 40} + }, + { + { 31, 22, 47, 30}, + { 31, 48, 25, 34}, + { 30, 95, 31, 32}, + { 32, 103, 33, 32}, + { 30, 24, 57, 31}, + { 30, 47, 26, 34}, + { 31, 95, 31, 32}, + { 43, 97, 35, 25}, + { 29, 26, 44, 63}, + { 37, 38, 24, 47}, + { 74, 63, 28, 20}, + { 110, 58, 34, 3}, + { 46, 22, 5, 108}, + { 93, 5, 9, 77}, + { 127, 0, 17, 52}, + { 127, 0, 15, 50} + }, + { + { 32, 27, 68, 24}, + { 35, 23, 35, 28}, + { 35, 64, 29, 29}, + { 37, 104, 33, 28}, + { 32, 32, 91, 40}, + { 36, 23, 67, 36}, + { 49, 23, 39, 28}, + { 60, 67, 30, 20}, + { 32, 32, 36, 95}, + { 35, 29, 38, 93}, + { 50, 16, 30, 84}, + { 72, 16, 15, 65}, + { 32, 32, 27, 100}, + { 33, 32, 29, 100}, + { 37, 29, 30, 98}, + { 48, 21, 29, 90} + } +}; + +// Weight vectors for MIP size_id 1. +static ALIGNED(32) const uint16_t uvg_mip_sid1_weights[] = { + 30, 63, 30, 60, 29, 45, 30, 39, 46, 37, 66, 38, 74, 42, 62, 58, // mode 0, offset 0 + 25, 33, 32, 31, 32, 32, 32, 33, 33, 34, 32, 33, 32, 33, 32, 33, + 30, 66, 29, 54, 28, 48, 28, 41, 55, 39, 69, 40, 71, 43, 72, 46, + 32, 30, 33, 31, 32, 33, 32, 34, 30, 36, 31, 33, 32, 33, 32, 33, + 30, 66, 29, 55, 27, 46, 27, 42, 56, 40, 69, 39, 72, 43, 69, 48, + 32, 33, 33, 33, 33, 33, 32, 34, 28, 33, 30, 32, 32, 33, 32, 33, + 30, 63, 29, 56, 27, 47, 27, 42, 55, 40, 66, 40, 69, 44, 65, 50, + 32, 33, 33, 33, 33, 33, 32, 34, 35, 30, 33, 30, 33, 32, 32, 33, + 32, 33, 33, 56, 33, 77, 33, 37, 30, 31, 28, 30, 52, 26, 80, 41, // mode 1, offset 128 + 74, 30, 41, 29, 29, 34, 31, 34, 31, 32, 32, 32, 30, 32, 30, 32, + 32, 32, 33, 31, 33, 47, 33, 61, 33, 31, 31, 30, 28, 29, 44, 28, + 59, 76, 78, 40, 53, 27, 34, 32, 28, 31, 28, 32, 31, 31, 31, 31, + 32, 31, 32, 31, 33, 27, 33, 33, 34, 30, 34, 29, 34, 29, 34, 30, + 26, 64, 45, 86, 73, 55, 62, 33, 76, 27, 36, 29, 25, 32, 30, 31, + 32, 31, 32, 31, 32, 30, 33, 28, 34, 30, 35, 29, 36, 29, 37, 30, + 30, 29, 27, 53, 40, 80, 58, 60, 58, 74, 77, 35, 44, 31, 31, 33, + 32, 51, 32, 95, 32, 27, 32, 34, 27, 32, 42, 29, 99, 34, 21, 104, // mode 2, offset 256 + 27, 50, 29, 42, 31, 41, 31, 42, 29, 32, 30, 32, 29, 32, 30, 32, + 32, 45, 32, 77, 32, 38, 32, 30, 30, 32, 38, 30, 78, 33, 30, 87, + 9, 88, 9, 76, 14, 67, 20, 59, 40, 30, 38, 30, 37, 30, 38, 31, + 33, 37, 34, 44, 36, 39, 37, 31, 32, 32, 34, 31, 45, 31, 31, 54, + 27, 18, 25, 17, 24, 15, 25, 14, 106, 34, 108, 31, 108, 30, 101, 32, + 36, 33, 39, 32, 44, 33, 47, 30, 32, 30, 32, 29, 31, 27, 31, 32, + 29, 37, 27, 37, 25, 37, 25, 34, 13, 110, 15, 108, 16, 106, 19, 102, + 32, 48, 32, 33, 32, 29, 33, 33, 35, 35, 59, 40, 47, 65, 31, 81, // mode 3, offset 384 + 47, 68, 27, 71, 24, 62, 26, 50, 31, 31, 33, 30, 37, 30, 42, 32, + 32, 30, 32, 20, 33, 30, 36, 34, 40, 38, 46, 50, 29, 66, 27, 69, + 30, 70, 26, 55, 25, 41, 26, 31, 55, 31, 64, 31, 72, 33, 67, 39, + 33, 28, 36, 27, 43, 30, 51, 27, 36, 40, 33, 50, 26, 57, 28, 55, + 30, 26, 31, 20, 28, 17, 22, 23, 85, 47, 79, 53, 67, 62, 49, 70, + 38, 29, 51, 31, 69, 23, 77, 13, 32, 39, 28, 43, 30, 40, 35, 38, + 28, 30, 24, 31, 15, 38, 8, 43, 22, 104, 17, 102, 10, 95, 8, 90, + 32, 38, 32, 40, 32, 37, 33, 34, 32, 33, 37, 32, 46, 35, 30, 62, // mode 4, offset 512 +101, 40, 100, 36, 94, 33, 81, 35, 29, 32, 30, 32, 30, 31, 30, 31, + 32, 32, 32, 31, 33, 33, 33, 32, 33, 32, 33, 33, 33, 33, 34, 36, + 22, 102, 26, 104, 31, 103, 37, 94, 39, 29, 34, 28, 32, 28, 33, 28, + 32, 33, 32, 34, 33, 33, 33, 33, 32, 32, 33, 33, 34, 33, 33, 36, + 34, 24, 33, 30, 31, 37, 30, 46, 99, 36, 98, 32, 95, 29, 85, 31, + 32, 33, 32, 34, 32, 33, 33, 33, 32, 33, 33, 33, 34, 34, 32, 37, + 30, 34, 31, 32, 31, 29, 32, 30, 23, 104, 30, 98, 39, 91, 47, 82, + 32, 52, 33, 19, 33, 30, 34, 35, 48, 31, 62, 50, 20, 74, 23, 56, // mode 5, offset 640 + 38, 76, 25, 50, 29, 29, 31, 25, 26, 32, 51, 31, 54, 51, 41, 76, + 33, 25, 35, 28, 37, 35, 38, 32, 38, 39, 25, 47, 22, 38, 33, 29, + 28, 39, 31, 23, 31, 27, 30, 31, 83, 35, 57, 74, 30, 101, 27, 103, + 34, 32, 38, 33, 40, 32, 40, 32, 27, 37, 28, 32, 33, 27, 34, 27, + 32, 25, 30, 31, 29, 33, 28, 33, 41, 92, 18, 111, 18, 111, 23, 105, + 35, 32, 38, 31, 40, 32, 40, 32, 30, 33, 33, 30, 33, 29, 33, 30, + 31, 33, 29, 33, 29, 34, 29, 34, 20, 107, 21, 106, 22, 105, 24, 101, + 32, 28, 33, 30, 33, 60, 33, 63, 31, 33, 28, 33, 26, 33, 44, 36, // mode 6, offset 768 + 92, 33, 71, 26, 47, 28, 37, 31, 30, 31, 32, 30, 33, 30, 33, 30, + 33, 30, 33, 28, 33, 30, 33, 38, 31, 33, 29, 34, 26, 33, 29, 32, + 43, 90, 71, 71, 86, 45, 74, 32, 33, 29, 26, 30, 28, 30, 33, 29, + 33, 32, 34, 31, 34, 31, 33, 32, 30, 32, 29, 33, 29, 33, 28, 34, + 29, 41, 26, 71, 37, 88, 55, 75, 95, 27, 73, 22, 46, 25, 36, 28, + 34, 31, 35, 32, 34, 33, 34, 34, 30, 32, 28, 33, 28, 33, 28, 34, + 33, 27, 33, 23, 30, 35, 33, 53, 43, 89, 77, 59, 91, 37, 74, 31, + 33, 49, 33, 71, 32, 23, 31, 33, 26, 32, 72, 24, 70, 68, 21, 106, // mode 7, offset 896 + 26, 52, 30, 32, 32, 32, 33, 32, 28, 31, 34, 31, 32, 32, 32, 33, + 34, 47, 34, 44, 32, 27, 30, 33, 32, 29, 89, 28, 46, 89, 20, 107, + 5, 86, 28, 37, 33, 31, 33, 33, 44, 26, 33, 30, 31, 32, 32, 33, + 35, 39, 34, 27, 31, 31, 29, 32, 42, 27, 87, 43, 32, 100, 22, 106, + 26, 24, 30, 34, 32, 33, 33, 33, 92, 35, 38, 31, 30, 32, 32, 33, + 35, 29, 34, 24, 31, 33, 29, 33, 47, 32, 69, 60, 31, 99, 25, 103, + 32, 32, 34, 33, 32, 33, 33, 33, 17, 100, 28, 44, 32, 31, 32, 35, +}; + + +// Weight vectors for MIP size_id 2. +static ALIGNED(32) const uint16_t uvg_mip_sid2_weights[] = { + 0, 42, 0, 71, 0, 77, 0, 64, 37, 33, 39, 34, 46, 35, 60, 35, // mode 0, offset 0 + 27, 44, 24, 36, 33, 30, 33, 31, 33, 35, 35, 36, 34, 36, 32, 36, + 0, 49, 0, 42, 0, 40, 0, 38, 71, 38, 66, 50, 52, 67, 43, 75, + 32, 32, 33, 31, 33, 31, 33, 32, 31, 36, 32, 36, 32, 35, 32, 35, + 0, 56, 0, 70, 0, 65, 0, 59, 40, 33, 49, 34, 57, 36, 60, 39, + 26, 43, 30, 28, 34, 28, 33, 30, 38, 36, 38, 38, 33, 39, 31, 38, + 0, 55, 0, 51, 0, 46, 0, 42, 60, 43, 61, 47, 62, 51, 60, 55, + 33, 30, 33, 30, 34, 30, 33, 31, 31, 38, 32, 37, 32, 37, 32, 37, + 0, 60, 0, 68, 0, 62, 0, 58, 42, 34, 52, 35, 58, 37, 59, 41, + 30, 37, 35, 22, 34, 28, 33, 30, 43, 38, 37, 40, 31, 40, 30, 39, + 0, 56, 0, 53, 0, 49, 0, 45, 59, 44, 60, 45, 65, 45, 64, 47, + 34, 30, 33, 30, 33, 30, 33, 31, 31, 38, 31, 38, 31, 38, 32, 38, + 0, 59, 0, 66, 0, 61, 0, 59, 44, 35, 53, 36, 58, 38, 57, 41, + 31, 34, 35, 25, 34, 29, 33, 30, 43, 41, 31, 43, 30, 40, 31, 39, + 0, 57, 0, 54, 0, 51, 0, 48, 58, 43, 61, 43, 64, 43, 64, 45, + 33, 30, 33, 31, 33, 31, 33, 32, 31, 39, 31, 39, 31, 39, 31, 39, + 0, 57, 0, 65, 0, 63, 0, 61, 45, 35, 54, 37, 56, 38, 56, 41, + 30, 35, 33, 33, 34, 30, 34, 30, 40, 44, 24, 44, 29, 39, 32, 39, + 0, 58, 0, 54, 0, 51, 0, 48, 58, 42, 62, 41, 65, 42, 63, 43, + 33, 31, 33, 31, 33, 31, 33, 32, 31, 39, 31, 39, 31, 39, 31, 39, + 0, 55, 0, 65, 0, 65, 0, 63, 46, 35, 53, 37, 54, 38, 55, 39, + 30, 36, 32, 36, 33, 31, 33, 30, 38, 47, 26, 40, 30, 38, 32, 38, + 0, 59, 0, 54, 0, 49, 0, 48, 58, 40, 64, 40, 66, 40, 64, 42, + 33, 31, 33, 31, 32, 32, 32, 32, 31, 39, 30, 40, 30, 41, 30, 41, + 0, 54, 0, 64, 0, 65, 0, 63, 46, 35, 52, 36, 53, 37, 55, 38, + 30, 34, 32, 34, 33, 32, 33, 31, 39, 49, 34, 35, 32, 37, 31, 39, + 0, 59, 0, 54, 0, 49, 0, 47, 60, 38, 64, 38, 66, 39, 64, 42, + 33, 31, 33, 32, 33, 32, 32, 33, 31, 40, 30, 40, 29, 41, 29, 42, + 0, 51, 0, 61, 0, 63, 0, 62, 46, 35, 51, 36, 53, 37, 55, 37, + 31, 33, 32, 33, 32, 32, 33, 32, 37, 54, 38, 36, 34, 37, 32, 39, + 0, 58, 0, 53, 0, 49, 0, 46, 59, 37, 63, 38, 64, 40, 62, 42, + 33, 32, 33, 32, 33, 33, 33, 33, 31, 40, 31, 40, 30, 41, 30, 42, + 0, 39, 0, 60, 0, 73, 0, 60, 34, 33, 38, 32, 49, 31, 73, 30, // mode 1, offset 512 + 58, 44, 40, 51, 39, 48, 39, 46, 31, 32, 30, 31, 32, 31, 33, 32, + 0, 43, 0, 35, 0, 33, 0, 31, 87, 35, 78, 54, 47, 86, 17, 114, + 38, 45, 36, 45, 35, 44, 34, 44, 33, 32, 33, 32, 33, 32, 34, 33, + 0, 43, 0, 53, 0, 52, 0, 46, 37, 32, 50, 30, 66, 30, 78, 35, + 53, 70, 42, 72, 39, 70, 37, 68, 30, 31, 31, 30, 32, 30, 34, 30, + 0, 43, 0, 40, 0, 33, 0, 26, 75, 48, 62, 68, 37, 97, 14, 122, + 37, 66, 35, 65, 33, 62, 32, 59, 34, 30, 35, 30, 37, 31, 38, 33, + 0, 40, 0, 45, 0, 41, 0, 37, 39, 33, 54, 32, 70, 35, 73, 44, + 34, 87, 34, 84, 33, 83, 32, 82, 37, 30, 41, 29, 40, 29, 40, 30, + 0, 37, 0, 35, 0, 28, 0, 19, 65, 60, 48, 82, 27, 108, 11, 127, + 31, 81, 30, 79, 28, 76, 27, 70, 41, 29, 43, 29, 45, 30, 46, 32, + 0, 38, 0, 39, 0, 33, 0, 30, 40, 34, 54, 35, 65, 41, 65, 53, + 27, 73, 30, 73, 29, 75, 27, 76, 62, 28, 62, 28, 59, 28, 58, 29, + 0, 29, 0, 27, 0, 19, 0, 9, 53, 72, 35, 95, 19, 117, 16, 127, + 26, 77, 24, 77, 23, 74, 23, 68, 58, 29, 60, 28, 61, 30, 60, 34, + 0, 35, 0, 33, 0, 28, 0, 24, 40, 35, 51, 39, 57, 49, 52, 65, + 29, 44, 29, 49, 28, 53, 26, 56, 89, 30, 86, 30, 83, 30, 82, 30, + 0, 22, 0, 18, 0, 10, 0, 0, 39, 86, 22, 108, 13, 125, 19, 127, + 24, 58, 23, 59, 22, 58, 22, 56, 82, 30, 82, 31, 80, 33, 74, 40, + 0, 33, 0, 29, 0, 24, 0, 19, 40, 36, 46, 44, 45, 58, 37, 78, + 31, 28, 29, 31, 28, 34, 26, 37, 90, 45, 92, 43, 91, 43, 91, 43, + 0, 15, 0, 11, 0, 2, 0, 0, 22, 99, 11, 118, 11, 127, 17, 127, + 25, 38, 24, 39, 23, 41, 23, 43, 91, 42, 90, 44, 85, 48, 75, 55, + 0, 31, 0, 27, 0, 22, 0, 15, 37, 39, 37, 52, 30, 70, 19, 91, + 30, 28, 28, 30, 27, 32, 26, 33, 54, 82, 58, 79, 58, 79, 58, 79, + 0, 10, 0, 5, 0, 0, 0, 0, 8, 111, 2, 125, 9, 127, 13, 127, + 25, 34, 25, 35, 25, 36, 25, 39, 58, 79, 57, 80, 53, 84, 47, 88, + 0, 28, 0, 24, 0, 19, 0, 13, 29, 46, 24, 62, 14, 81, 4, 101, + 28, 39, 27, 41, 25, 43, 24, 44, 2, 123, 1, 125, 0, 126, 0, 127, + 0, 6, 0, 0, 0, 0, 0, 0, 0, 116, 0, 126, 4, 127, 9, 127, + 23, 45, 23, 45, 25, 44, 25, 44, 0, 127, 1, 127, 2, 127, 3, 127, + 0, 30, 0, 63, 0, 98, 0, 75, 32, 32, 26, 34, 26, 34, 61, 30, // mode 2, offset 1024 + 42, 34, 16, 38, 25, 34, 31, 32, 32, 32, 32, 32, 33, 32, 33, 32, + 0, 36, 0, 26, 0, 30, 0, 32, 94, 32, 76, 58, 39, 91, 23, 105, + 30, 33, 30, 33, 31, 32, 32, 32, 32, 32, 32, 32, 33, 31, 32, 32, + 0, 34, 0, 66, 0, 97, 0, 71, 30, 33, 24, 34, 28, 34, 65, 30, + 31, 52, 11, 41, 24, 34, 30, 32, 29, 32, 33, 32, 33, 32, 33, 32, + 0, 34, 0, 26, 0, 30, 0, 32, 92, 35, 70, 64, 37, 94, 23, 105, + 30, 33, 29, 34, 30, 33, 31, 33, 32, 32, 32, 32, 32, 31, 33, 31, + 0, 37, 0, 71, 0, 98, 0, 66, 29, 33, 22, 35, 29, 34, 70, 30, + 8, 79, 5, 50, 23, 34, 31, 31, 27, 32, 32, 32, 34, 32, 33, 32, + 0, 31, 0, 26, 0, 30, 0, 31, 92, 38, 66, 68, 34, 97, 22, 106, + 30, 33, 29, 34, 30, 34, 30, 34, 32, 32, 32, 31, 33, 31, 33, 31, + 0, 40, 0, 76, 0, 97, 0, 61, 28, 34, 21, 35, 32, 34, 75, 29, + 0, 76, 0, 55, 21, 37, 30, 32, 46, 28, 35, 32, 33, 33, 32, 32, + 0, 29, 0, 26, 0, 29, 0, 31, 92, 40, 62, 73, 32, 99, 22, 107, + 29, 33, 29, 34, 30, 34, 30, 34, 32, 32, 32, 31, 33, 30, 33, 31, + 0, 42, 0, 80, 0, 94, 0, 55, 27, 34, 20, 35, 36, 32, 80, 29, + 1, 48, 0, 48, 17, 40, 27, 35, 79, 25, 47, 31, 33, 33, 31, 32, + 0, 27, 0, 26, 0, 29, 0, 31, 90, 43, 58, 76, 30, 101, 21, 108, + 28, 34, 29, 33, 29, 34, 29, 35, 32, 31, 33, 30, 34, 30, 34, 30, + 0, 44, 0, 81, 0, 90, 0, 51, 26, 34, 21, 35, 41, 31, 82, 29, + 6, 30, 0, 41, 14, 41, 24, 37, 80, 40, 52, 35, 35, 33, 32, 32, + 0, 27, 0, 26, 0, 29, 0, 31, 87, 47, 54, 79, 29, 102, 21, 108, + 27, 35, 29, 34, 28, 34, 28, 35, 32, 31, 33, 30, 33, 30, 33, 31, + 0, 47, 0, 80, 0, 84, 0, 49, 26, 34, 24, 34, 45, 31, 81, 31, + 7, 34, 0, 41, 12, 40, 22, 37, 44, 75, 41, 50, 36, 36, 33, 32, + 0, 28, 0, 28, 0, 29, 0, 31, 81, 51, 51, 81, 30, 101, 22, 107, + 26, 35, 28, 34, 28, 35, 28, 35, 33, 31, 33, 30, 33, 31, 33, 32, + 0, 48, 0, 75, 0, 77, 0, 49, 27, 34, 27, 34, 47, 33, 75, 36, + 10, 40, 3, 42, 12, 40, 21, 37, 16, 97, 26, 66, 32, 43, 33, 35, + 0, 32, 0, 30, 0, 30, 0, 31, 72, 55, 49, 81, 32, 98, 24, 104, + 25, 36, 27, 35, 28, 35, 28, 35, 33, 32, 33, 31, 32, 32, 32, 33, + 0, 36, 0, 74, 0, 92, 0, 53, 29, 33, 20, 35, 35, 32, 80, 26, // mode 3, offset 1536 + 43, 47, 19, 47, 29, 31, 33, 28, 29, 31, 34, 32, 40, 34, 36, 37, + 0, 24, 0, 25, 0, 32, 0, 34, 91, 41, 57, 74, 28, 99, 20, 105, + 31, 31, 31, 32, 32, 32, 33, 32, 31, 38, 30, 37, 29, 36, 30, 35, + 0, 50, 0, 75, 0, 64, 0, 31, 26, 34, 28, 33, 58, 29, 85, 37, + 33, 74, 23, 46, 30, 26, 31, 27, 30, 31, 47, 33, 46, 40, 33, 44, + 0, 22, 0, 29, 0, 33, 0, 34, 67, 64, 35, 93, 20, 105, 19, 106, + 30, 31, 31, 32, 32, 33, 33, 32, 28, 42, 27, 40, 27, 37, 29, 36, + 0, 51, 0, 61, 0, 40, 0, 22, 29, 33, 42, 31, 70, 34, 72, 54, + 25, 72, 30, 31, 32, 24, 30, 31, 51, 30, 60, 39, 41, 50, 27, 50, + 0, 25, 0, 32, 0, 34, 0, 34, 44, 83, 23, 102, 18, 107, 19, 105, + 30, 33, 32, 33, 32, 33, 33, 32, 25, 44, 26, 40, 28, 37, 30, 35, + 0, 45, 0, 43, 0, 27, 0, 22, 35, 32, 53, 33, 67, 45, 53, 72, + 30, 39, 35, 24, 32, 29, 30, 33, 79, 33, 53, 55, 27, 61, 22, 52, + 0, 28, 0, 32, 0, 34, 0, 34, 31, 95, 20, 105, 18, 107, 20, 105, + 31, 33, 32, 33, 32, 32, 33, 31, 25, 43, 27, 38, 29, 36, 31, 35, + 0, 38, 0, 31, 0, 22, 0, 25, 40, 32, 55, 39, 57, 60, 39, 86, + 35, 23, 34, 29, 31, 35, 31, 35, 72, 54, 32, 73, 18, 64, 22, 49, + 0, 30, 0, 33, 0, 34, 0, 34, 24, 101, 19, 106, 18, 107, 20, 104, + 32, 33, 32, 32, 33, 31, 33, 31, 27, 40, 30, 36, 31, 35, 32, 34, + 0, 33, 0, 26, 0, 23, 0, 27, 42, 35, 51, 50, 46, 74, 32, 93, + 34, 28, 33, 34, 31, 35, 32, 34, 39, 82, 18, 80, 20, 59, 26, 44, + 0, 31, 0, 33, 0, 34, 0, 35, 22, 103, 19, 106, 19, 106, 21, 103, + 32, 32, 33, 31, 33, 31, 34, 31, 30, 37, 31, 35, 32, 34, 32, 34, + 0, 29, 0, 24, 0, 24, 0, 28, 41, 41, 44, 62, 37, 83, 28, 97, + 33, 34, 34, 35, 34, 33, 33, 32, 20, 92, 18, 73, 25, 52, 30, 40, + 0, 32, 0, 34, 0, 35, 0, 35, 23, 103, 20, 105, 20, 104, 22, 102, + 33, 31, 34, 30, 34, 30, 34, 30, 32, 36, 33, 34, 33, 33, 33, 34, + 0, 27, 0, 26, 0, 27, 0, 30, 38, 51, 37, 71, 33, 87, 28, 96, + 34, 34, 35, 34, 35, 32, 34, 31, 20, 86, 24, 64, 30, 47, 32, 39, + 0, 32, 0, 34, 0, 35, 0, 34, 24, 100, 23, 101, 23, 101, 24, 99, + 35, 30, 34, 30, 34, 30, 35, 30, 32, 36, 33, 34, 32, 34, 33, 34, + 0, 39, 0, 72, 0, 100, 0, 75, 30, 31, 21, 32, 23, 32, 63, 24, // mode 4, offset 2048 + 67, 33, 43, 39, 35, 39, 32, 38, 34, 31, 33, 31, 34, 31, 34, 32, + 0, 32, 0, 22, 0, 31, 0, 35, 98, 26, 77, 55, 37, 90, 22, 100, + 29, 37, 29, 36, 31, 35, 33, 33, 35, 32, 35, 31, 35, 32, 36, 33, + 0, 47, 0, 71, 0, 86, 0, 65, 29, 32, 24, 32, 31, 30, 63, 25, + 74, 54, 60, 50, 46, 48, 34, 46, 32, 31, 36, 30, 37, 30, 39, 30, + 0, 33, 0, 26, 0, 33, 0, 37, 85, 32, 64, 60, 33, 87, 23, 93, + 28, 43, 27, 39, 29, 35, 32, 33, 40, 30, 41, 30, 41, 31, 41, 32, + 0, 41, 0, 55, 0, 62, 0, 53, 32, 32, 31, 32, 37, 31, 55, 31, + 45, 84, 50, 70, 45, 61, 36, 55, 32, 32, 40, 30, 45, 29, 48, 29, + 0, 38, 0, 34, 0, 38, 0, 40, 63, 40, 49, 60, 30, 78, 24, 83, + 29, 48, 27, 43, 28, 38, 30, 36, 50, 28, 51, 29, 50, 31, 48, 33, + 0, 35, 0, 39, 0, 41, 0, 41, 33, 33, 35, 33, 39, 34, 43, 37, + 29, 75, 34, 68, 36, 61, 33, 54, 58, 29, 59, 29, 62, 29, 64, 28, + 0, 41, 0, 42, 0, 42, 0, 42, 43, 45, 36, 56, 30, 65, 28, 68, + 30, 48, 27, 44, 27, 41, 28, 37, 65, 29, 63, 30, 60, 33, 56, 36, + 0, 33, 0, 31, 0, 31, 0, 35, 34, 33, 36, 34, 37, 35, 35, 39, + 31, 42, 31, 44, 32, 43, 32, 40, 88, 30, 84, 31, 83, 31, 82, 31, + 0, 40, 0, 44, 0, 44, 0, 43, 32, 44, 30, 48, 30, 52, 30, 55, + 31, 38, 30, 37, 28, 37, 29, 35, 81, 31, 78, 33, 72, 36, 66, 40, + 0, 32, 0, 30, 0, 30, 0, 33, 33, 33, 34, 34, 34, 36, 32, 38, + 34, 25, 33, 25, 34, 25, 34, 25, 85, 48, 88, 44, 90, 41, 90, 40, + 0, 38, 0, 42, 0, 43, 0, 42, 29, 41, 29, 41, 30, 42, 31, 45, + 34, 26, 33, 27, 31, 28, 31, 30, 88, 40, 85, 41, 80, 43, 72, 47, + 0, 32, 0, 31, 0, 32, 0, 34, 33, 33, 32, 34, 32, 35, 31, 36, + 33, 26, 35, 20, 36, 17, 36, 17, 54, 79, 68, 68, 76, 62, 79, 59, + 0, 37, 0, 39, 0, 41, 0, 40, 29, 37, 29, 37, 30, 37, 31, 40, + 36, 18, 35, 20, 34, 22, 32, 26, 78, 58, 77, 58, 74, 58, 68, 59, + 0, 33, 0, 34, 0, 34, 0, 35, 31, 34, 30, 34, 31, 34, 31, 34, + 33, 29, 35, 23, 36, 20, 36, 18, 31, 98, 45, 88, 54, 82, 59, 78, + 0, 36, 0, 38, 0, 39, 0, 39, 31, 34, 30, 34, 31, 35, 31, 37, + 37, 19, 36, 20, 35, 22, 34, 24, 60, 76, 61, 74, 60, 73, 59, 71, + 0, 30, 0, 47, 0, 81, 0, 85, 33, 32, 30, 31, 28, 32, 46, 29, // mode 5, offset 2560 + 55, 32, 29, 36, 28, 34, 32, 32, 32, 32, 32, 32, 32, 32, 33, 32, + 0, 54, 0, 30, 0, 30, 0, 37, 82, 26, 90, 38, 56, 73, 21, 102, + 32, 32, 31, 32, 31, 33, 32, 32, 33, 32, 33, 32, 32, 32, 32, 32, + 0, 33, 0, 38, 0, 63, 0, 82, 32, 31, 32, 31, 30, 31, 37, 30, + 68, 39, 43, 34, 29, 34, 29, 33, 31, 31, 33, 31, 32, 32, 32, 32, + 0, 71, 0, 44, 0, 33, 0, 37, 63, 27, 86, 30, 72, 55, 37, 86, + 31, 32, 30, 33, 30, 32, 31, 32, 33, 32, 33, 32, 32, 31, 33, 31, + 0, 34, 0, 36, 0, 51, 0, 75, 33, 32, 33, 31, 30, 31, 31, 31, + 60, 61, 56, 38, 38, 33, 30, 33, 29, 32, 32, 31, 33, 32, 33, 32, + 0, 80, 0, 60, 0, 41, 0, 38, 47, 29, 73, 27, 78, 41, 53, 68, + 30, 32, 30, 33, 30, 33, 30, 32, 33, 31, 33, 31, 32, 31, 33, 31, + 0, 33, 0, 35, 0, 43, 0, 64, 33, 32, 33, 31, 32, 31, 30, 31, + 43, 77, 55, 54, 46, 39, 35, 34, 35, 30, 29, 32, 31, 32, 33, 32, + 0, 79, 0, 73, 0, 54, 0, 43, 37, 30, 57, 28, 73, 33, 64, 52, + 31, 32, 30, 32, 30, 32, 30, 32, 33, 31, 33, 31, 33, 31, 33, 31, + 0, 33, 0, 34, 0, 38, 0, 54, 33, 32, 33, 31, 33, 31, 31, 31, + 34, 68, 45, 70, 48, 52, 40, 39, 58, 28, 33, 31, 29, 32, 31, 32, + 0, 73, 0, 77, 0, 65, 0, 51, 32, 31, 45, 29, 63, 30, 66, 42, + 34, 34, 31, 32, 31, 31, 30, 32, 33, 31, 32, 32, 33, 31, 33, 31, + 0, 33, 0, 34, 0, 36, 0, 47, 32, 32, 33, 31, 33, 30, 31, 31, + 34, 44, 38, 66, 44, 62, 43, 48, 81, 31, 52, 28, 34, 31, 30, 32, + 0, 64, 0, 75, 0, 71, 0, 59, 31, 31, 38, 30, 53, 30, 61, 37, + 38, 38, 33, 34, 31, 32, 30, 32, 32, 32, 32, 32, 33, 32, 33, 32, + 0, 33, 0, 34, 0, 36, 0, 43, 32, 31, 33, 31, 33, 31, 32, 31, + 35, 31, 37, 49, 41, 60, 43, 54, 71, 54, 70, 33, 48, 30, 35, 31, + 0, 56, 0, 68, 0, 70, 0, 63, 31, 31, 35, 30, 45, 30, 55, 35, + 40, 44, 36, 37, 33, 34, 31, 33, 32, 32, 32, 32, 33, 32, 33, 32, + 0, 33, 0, 34, 0, 36, 0, 41, 32, 31, 32, 31, 33, 31, 33, 31, + 33, 34, 36, 38, 39, 50, 41, 53, 36, 87, 62, 52, 57, 36, 43, 33, + 0, 50, 0, 59, 0, 65, 0, 62, 33, 31, 35, 31, 42, 31, 49, 35, + 41, 48, 37, 41, 35, 36, 33, 34, 36, 32, 34, 32, 33, 32, 34, 33, +}; \ No newline at end of file diff --git a/src/strategies/generic/dct-generic.c b/src/strategies/generic/dct-generic.c index ccddf17a..3b9f1d43 100644 --- a/src/strategies/generic/dct-generic.c +++ b/src/strategies/generic/dct-generic.c @@ -2608,6 +2608,7 @@ static void mts_dct_generic( if (height == 1) { dct_hor(input, output, shift_1st, height, 0, skip_width); } else if (width == 1) { + // The shift value is taken from VTM, it's a special case for width == 1 dct_ver(input, output, log2_height_minus1 + 1 + bitdepth + 6 - 15, width, 0, skip_height); } else { dct_hor(input, tmp, shift_1st, height, 0, skip_width); diff --git a/src/strategies/generic/intra-generic.c b/src/strategies/generic/intra-generic.c index 398388fc..54a67ef6 100644 --- a/src/strategies/generic/intra-generic.c +++ b/src/strategies/generic/intra-generic.c @@ -36,6 +36,7 @@ #include "cu.h" #include "intra.h" +#include "strategies/generic/mip_data_generic.h" #include "uvg266.h" #include "strategyselector.h" #include "uvg_math.h" @@ -458,6 +459,295 @@ static void uvg_pdpc_planar_dc_generic( } + +void uvg_mip_boundary_downsampling_1D(int* reduced_dst, const int* const ref_src, int src_len, int dst_len) +{ + if (dst_len < src_len) + { + // Create reduced boundary by downsampling + uint16_t down_smp_factor = src_len / dst_len; + const int log2_factor = uvg_math_floor_log2(down_smp_factor); + const int rounding_offset = (1 << (log2_factor - 1)); + + uint16_t src_idx = 0; + for (uint16_t dst_idx = 0; dst_idx < dst_len; dst_idx++) + { + int sum = 0; + for (int k = 0; k < down_smp_factor; k++) + { + sum += ref_src[src_idx++]; + } + reduced_dst[dst_idx] = (sum + rounding_offset) >> log2_factor; + } + } + else + { + // Copy boundary if no downsampling is needed + for (uint16_t i = 0; i < dst_len; ++i) + { + reduced_dst[i] = ref_src[i]; + } + } +} + + +void uvg_mip_reduced_pred(int* const output, + const int* const input, + const uint8_t* matrix, + const bool transpose, + const int red_bdry_size, + const int red_pred_size, + const int size_id, + const int in_offset, + const int in_offset_tr) +{ + const int input_size = 2 * red_bdry_size; + + // Use local buffer for transposed result + int out_buf_transposed[LCU_WIDTH * LCU_WIDTH]; + int* const out_ptr = transpose ? out_buf_transposed : output; + + int sum = 0; + for (int i = 0; i < input_size; i++) { + sum += input[i]; + } + const int offset = (1 << (MIP_SHIFT_MATRIX - 1)) - MIP_OFFSET_MATRIX * sum; + assert((input_size == 4 * (input_size >> 2)) && "MIP input size must be divisible by four"); + + const uint8_t* weight = matrix; + const int input_offset = transpose ? in_offset_tr : in_offset; + + int pos_res = 0; + for (int y = 0; y < red_pred_size; y++) { + for (int x = 0; x < red_pred_size; x++) { + int tmp0 = input[0] * weight[0]; + int tmp1 = input[1] * weight[1]; + int tmp2 = input[2] * weight[2]; + int tmp3 = input[3] * weight[3]; + for (int i = 4; i < input_size; i += 4) { + tmp0 += input[i] * weight[i]; + tmp1 += input[i + 1] * weight[i + 1]; + tmp2 += input[i + 2] * weight[i + 2]; + tmp3 += input[i + 3] * weight[i + 3]; + } + out_ptr[pos_res] = CLIP_TO_PIXEL(((tmp0 + tmp1 + tmp2 + tmp3 + offset) >> MIP_SHIFT_MATRIX) + input_offset); + pos_res++; + weight += input_size; + } + } + + if (transpose) { + for (int y = 0; y < red_pred_size; y++) { + for (int x = 0; x < red_pred_size; x++) { + output[y * red_pred_size + x] = out_ptr[x * red_pred_size + y]; + } + } + } +} + + +void uvg_mip_pred_upsampling_1D(int* const dst, const int* const src, const int* const boundary, + const uint16_t src_size_ups_dim, const uint16_t src_size_orth_dim, + const uint16_t src_step, const uint16_t src_stride, + const uint16_t dst_step, const uint16_t dst_stride, + const uint16_t boundary_step, + const uint16_t ups_factor) +{ + const int log2_factor = uvg_math_floor_log2(ups_factor); + assert(ups_factor >= 2 && "Upsampling factor must be at least 2."); + const int rounding_offset = 1 << (log2_factor - 1); + + uint16_t idx_orth_dim = 0; + const int* src_line = src; + int* dst_line = dst; + const int* boundary_line = boundary + boundary_step - 1; + while (idx_orth_dim < src_size_orth_dim) + { + uint16_t idx_upsample_dim = 0; + const int* before = boundary_line; + const int* behind = src_line; + int* cur_dst = dst_line; + while (idx_upsample_dim < src_size_ups_dim) + { + uint16_t pos = 1; + int scaled_before = (*before) << log2_factor; + int scaled_behind = 0; + while (pos <= ups_factor) + { + scaled_before -= *before; + scaled_behind += *behind; + *cur_dst = (scaled_before + scaled_behind + rounding_offset) >> log2_factor; + + pos++; + cur_dst += dst_step; + } + + idx_upsample_dim++; + before = behind; + behind += src_step; + } + + idx_orth_dim++; + src_line += src_stride; + dst_line += dst_stride; + boundary_line += boundary_step; + } +} + + + +/** \brief Matrix weighted intra prediction. +*/ +static void mip_predict_generic( + const uvg_intra_references* const refs, + const uint16_t pred_block_width, + const uint16_t pred_block_height, + uvg_pixel* dst, + const int mip_mode, + const bool mip_transp) +{ + // MIP prediction uses int values instead of uvg_pixel as some temp values may be negative + + uvg_pixel* out = dst; + int result[32*32] = {0}; + const int mode_idx = mip_mode; + + // *** INPUT PREP *** + + // Initialize prediction parameters START + uint16_t width = pred_block_width; + uint16_t height = pred_block_height; + + int size_id; // Prediction block type + if (width == 4 && height == 4) { + size_id = 0; + } + else if (width == 4 || height == 4 || (width == 8 && height == 8)) { + size_id = 1; + } + else { + size_id = 2; + } + + // Reduced boundary and prediction sizes + int red_bdry_size = (size_id == 0) ? 2 : 4; + int red_pred_size = (size_id < 2) ? 4 : 8; + + // Upsampling factors + uint16_t ups_hor_factor = width / red_pred_size; + uint16_t ups_ver_factor = height / red_pred_size; + + // Upsampling factors must be powers of two + assert(!((ups_hor_factor < 1) || ((ups_hor_factor & (ups_hor_factor - 1))) != 0) && "Horizontal upsampling factor must be power of two."); + assert(!((ups_ver_factor < 1) || ((ups_ver_factor & (ups_ver_factor - 1))) != 0) && "Vertical upsampling factor must be power of two."); + + // Initialize prediction parameters END + + int ref_samples_top[INTRA_REF_LENGTH]; + int ref_samples_left[INTRA_REF_LENGTH]; + + for (int i = 1; i < INTRA_REF_LENGTH; i++) { + ref_samples_top[i-1] = (int)refs->ref.top[i]; // NOTE: in VTM code these are indexed as x + 1 & y + 1 during init + ref_samples_left[i-1] = (int)refs->ref.left[i]; + } + + // Compute reduced boundary with Haar-downsampling + const int input_size = 2 * red_bdry_size; + + int red_bdry[MIP_MAX_INPUT_SIZE]; + int red_bdry_trans[MIP_MAX_INPUT_SIZE]; + + int* const top_reduced = &red_bdry[0]; + int* const left_reduced = &red_bdry[red_bdry_size]; + + uvg_mip_boundary_downsampling_1D(top_reduced, ref_samples_top, width, red_bdry_size); + uvg_mip_boundary_downsampling_1D(left_reduced, ref_samples_left, height, red_bdry_size); + + // Transposed reduced boundaries + int* const left_reduced_trans = &red_bdry_trans[0]; + int* const top_reduced_trans = &red_bdry_trans[red_bdry_size]; + + for (int x = 0; x < red_bdry_size; x++) { + top_reduced_trans[x] = top_reduced[x]; + } + for (int y = 0; y < red_bdry_size; y++) { + left_reduced_trans[y] = left_reduced[y]; + } + + int input_offset = red_bdry[0]; + int input_offset_trans = red_bdry_trans[0]; + + const bool has_first_col = (size_id < 2); + // First column of matrix not needed for large blocks + red_bdry[0] = has_first_col ? ((1 << (UVG_BIT_DEPTH - 1)) - input_offset) : 0; + red_bdry_trans[0] = has_first_col ? ((1 << (UVG_BIT_DEPTH - 1)) - input_offset_trans) : 0; + + for (int i = 1; i < input_size; ++i) { + red_bdry[i] -= input_offset; + red_bdry_trans[i] -= input_offset_trans; + } + + // *** INPUT PREP *** END + + // *** BLOCK PREDICT *** + + const bool need_upsampling = (ups_hor_factor > 1) || (ups_ver_factor > 1); + const bool transpose = mip_transp; + + const uint8_t* matrix; + switch (size_id) { + case 0: + matrix = &uvg_mip_matrix_4x4[mode_idx][0][0]; + break; + case 1: + matrix = &uvg_mip_matrix_8x8[mode_idx][0][0]; + break; + case 2: + matrix = &uvg_mip_matrix_16x16[mode_idx][0][0]; + break; + default: + assert(false && "Invalid MIP size id."); + } + + // Max possible size is red_pred_size * red_pred_size, red_pred_size can be either 4 or 8 + int red_pred_buffer[8*8]; + int* const reduced_pred = need_upsampling ? red_pred_buffer : result; + + const int* const reduced_bdry = transpose ? red_bdry_trans : red_bdry; + + uvg_mip_reduced_pred(reduced_pred, reduced_bdry, matrix, transpose, red_bdry_size, red_pred_size, size_id, input_offset, input_offset_trans); + if (need_upsampling) { + const int* ver_src = reduced_pred; + uint16_t ver_src_step = width; + + if (ups_hor_factor > 1) { + int* const hor_dst = result + (ups_ver_factor - 1) * width; + ver_src = hor_dst; + ver_src_step *= ups_ver_factor; + + uvg_mip_pred_upsampling_1D(hor_dst, reduced_pred, ref_samples_left, + red_pred_size, red_pred_size, + 1, red_pred_size, 1, ver_src_step, + ups_ver_factor, ups_hor_factor); + } + + if (ups_ver_factor > 1) { + uvg_mip_pred_upsampling_1D(result, ver_src, ref_samples_top, + red_pred_size, width, + ver_src_step, 1, width, 1, + 1, ups_ver_factor); + } + } + + // Assign and cast values from temp array to output + for (int i = 0; i < 32 * 32; i++) { + out[i] = (uvg_pixel)result[i]; + } + // *** BLOCK PREDICT *** END +} + + + int uvg_strategy_register_intra_generic(void* opaque, uint8_t bitdepth) { bool success = true; @@ -466,6 +756,7 @@ int uvg_strategy_register_intra_generic(void* opaque, uint8_t bitdepth) success &= uvg_strategyselector_register(opaque, "intra_pred_planar", "generic", 0, &uvg_intra_pred_planar_generic); success &= uvg_strategyselector_register(opaque, "intra_pred_filtered_dc", "generic", 0, &uvg_intra_pred_filtered_dc_generic); success &= uvg_strategyselector_register(opaque, "pdpc_planar_dc", "generic", 0, &uvg_pdpc_planar_dc_generic); + success &= uvg_strategyselector_register(opaque, "mip_predict", "generic", 0, &mip_predict_generic); return success; } diff --git a/src/strategies/generic/mip_data_generic.h b/src/strategies/generic/mip_data_generic.h new file mode 100644 index 00000000..957badcc --- /dev/null +++ b/src/strategies/generic/mip_data_generic.h @@ -0,0 +1,885 @@ +/***************************************************************************** + * This file is part of uvg266 VVC encoder. + * + * Copyright (c) 2021, Tampere University, ITU/ISO/IEC, project contributors + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, this + * list of conditions and the following disclaimer in the documentation and/or + * other materials provided with the distribution. + * + * * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS + ****************************************************************************/ + + /** +* \ingroup Reconstruction +* \file +* MIP weight matrix data. +*/ + +/** \file MipData.h +\brief weight and bias data for matrix-based intra prediction (MIP) +*/ +#pragma once + +#define MIP_SHIFT_MATRIX 6 +#define MIP_OFFSET_MATRIX 32 + +ALIGNED(32) static const uint8_t uvg_mip_matrix_4x4[16][16][4] = { + { + { 32, 30, 90, 28}, + { 32, 32, 72, 28}, + { 34, 77, 53, 30}, + { 51, 124, 36, 37}, + { 31, 31, 95, 37}, + { 33, 31, 70, 50}, + { 52, 80, 25, 60}, + { 78, 107, 1, 65}, + { 31, 29, 37, 95}, + { 38, 34, 19, 101}, + { 73, 85, 0, 81}, + { 92, 99, 0, 65}, + { 34, 29, 14, 111}, + { 48, 48, 7, 100}, + { 80, 91, 0, 74}, + { 89, 97, 0, 64} + }, + { + { 31, 23, 34, 29}, + { 31, 43, 34, 31}, + { 30, 95, 34, 32}, + { 29, 100, 35, 33}, + { 31, 23, 34, 29}, + { 31, 43, 34, 31}, + { 30, 95, 34, 32}, + { 29, 99, 35, 33}, + { 31, 24, 35, 29}, + { 31, 44, 34, 31}, + { 30, 95, 35, 32}, + { 29, 99, 35, 33}, + { 31, 24, 35, 30}, + { 31, 44, 35, 31}, + { 30, 95, 35, 32}, + { 29, 99, 35, 33} + }, + { + { 32, 32, 36, 58}, + { 32, 29, 26, 66}, + { 36, 37, 23, 61}, + { 79, 84, 3, 37}, + { 32, 32, 30, 69}, + { 33, 29, 24, 71}, + { 44, 16, 21, 70}, + { 96, 18, 0, 57}, + { 32, 31, 24, 74}, + { 33, 30, 23, 71}, + { 36, 24, 24, 71}, + { 59, 9, 16, 68}, + { 32, 32, 23, 75}, + { 33, 30, 24, 70}, + { 32, 30, 25, 71}, + { 36, 26, 25, 70} + }, + { + { 32, 33, 34, 32}, + { 32, 30, 22, 38}, + { 29, 46, 25, 38}, + { 53, 123, 28, 22}, + { 32, 33, 30, 37}, + { 32, 30, 21, 38}, + { 32, 40, 24, 38}, + { 64, 116, 26, 17}, + { 32, 32, 23, 49}, + { 32, 30, 21, 39}, + { 34, 39, 24, 37}, + { 72, 109, 23, 16}, + { 33, 31, 17, 60}, + { 32, 31, 21, 39}, + { 35, 41, 24, 37}, + { 72, 106, 22, 18} + }, + { + { 34, 25, 89, 20}, + { 38, 32, 47, 24}, + { 40, 86, 29, 27}, + { 38, 98, 32, 29}, + { 34, 31, 94, 40}, + { 44, 25, 83, 27}, + { 54, 72, 43, 16}, + { 47, 94, 33, 22}, + { 33, 31, 36, 94}, + { 43, 23, 51, 76}, + { 62, 55, 64, 25}, + { 57, 89, 38, 15}, + { 32, 32, 28, 101}, + { 38, 26, 33, 94}, + { 55, 38, 68, 47}, + { 59, 80, 52, 16} + }, + { + { 28, 30, 68, 29}, + { 23, 48, 23, 48}, + { 39, 98, 16, 42}, + { 84, 86, 20, 17}, + { 25, 31, 52, 74}, + { 38, 68, 5, 70}, + { 95, 78, 7, 21}, + { 127, 54, 12, 0}, + { 30, 47, 14, 107}, + { 79, 76, 0, 53}, + { 127, 59, 7, 1}, + { 127, 51, 9, 0}, + { 50, 71, 1, 96}, + { 109, 69, 7, 25}, + { 127, 56, 9, 0}, + { 123, 53, 13, 0} + }, + { + { 40, 20, 72, 18}, + { 48, 29, 44, 18}, + { 53, 81, 35, 18}, + { 48, 96, 33, 22}, + { 45, 23, 79, 49}, + { 61, 21, 56, 49}, + { 72, 52, 32, 48}, + { 65, 69, 20, 50}, + { 41, 27, 29, 96}, + { 49, 22, 28, 94}, + { 52, 22, 28, 93}, + { 49, 27, 27, 92}, + { 37, 29, 26, 98}, + { 39, 28, 28, 97}, + { 38, 28, 30, 97}, + { 38, 29, 30, 95} + }, + { + { 33, 27, 43, 27}, + { 32, 29, 31, 31}, + { 31, 73, 33, 31}, + { 35, 104, 34, 28}, + { 32, 30, 63, 22}, + { 33, 26, 33, 29}, + { 33, 57, 33, 30}, + { 37, 100, 35, 27}, + { 32, 31, 85, 25}, + { 34, 25, 39, 25}, + { 35, 39, 32, 28}, + { 40, 91, 35, 25}, + { 32, 30, 77, 50}, + { 34, 26, 54, 22}, + { 37, 31, 34, 27}, + { 45, 75, 34, 23} + }, + { + { 34, 25, 77, 19}, + { 36, 34, 56, 24}, + { 41, 83, 39, 30}, + { 47, 96, 28, 35}, + { 34, 31, 70, 65}, + { 38, 29, 53, 77}, + { 43, 36, 37, 83}, + { 48, 39, 28, 83}, + { 33, 31, 31, 98}, + { 33, 31, 30, 99}, + { 34, 30, 31, 98}, + { 36, 29, 31, 96}, + { 32, 32, 30, 97}, + { 32, 32, 31, 96}, + { 31, 33, 33, 96}, + { 32, 33, 34, 94} + }, + { + { 30, 30, 93, 19}, + { 31, 59, 67, 34}, + { 31, 79, 36, 59}, + { 30, 67, 17, 79}, + { 30, 38, 68, 69}, + { 29, 40, 43, 91}, + { 26, 35, 32, 101}, + { 23, 32, 30, 101}, + { 26, 34, 30, 101}, + { 23, 33, 30, 102}, + { 20, 32, 31, 102}, + { 18, 33, 32, 102}, + { 23, 33, 31, 100}, + { 20, 34, 32, 100}, + { 18, 35, 33, 100}, + { 18, 35, 33, 100} + }, + { + { 31, 54, 90, 26}, + { 32, 60, 53, 61}, + { 34, 49, 37, 84}, + { 34, 39, 35, 89}, + { 35, 38, 41, 88}, + { 35, 35, 32, 96}, + { 35, 31, 33, 96}, + { 35, 32, 35, 94}, + { 34, 34, 30, 97}, + { 35, 32, 33, 95}, + { 35, 32, 34, 94}, + { 35, 34, 34, 93}, + { 34, 34, 34, 93}, + { 35, 34, 34, 93}, + { 35, 34, 34, 92}, + { 36, 34, 35, 91} + }, + { + { 32, 29, 54, 24}, + { 31, 32, 34, 29}, + { 31, 43, 34, 29}, + { 32, 67, 36, 28}, + { 31, 34, 69, 37}, + { 31, 35, 46, 33}, + { 30, 35, 39, 33}, + { 30, 42, 39, 36}, + { 31, 35, 39, 88}, + { 30, 38, 41, 84}, + { 30, 39, 40, 81}, + { 39, 46, 38, 78}, + { 31, 36, 34, 96}, + { 34, 38, 37, 93}, + { 55, 42, 38, 82}, + { 89, 53, 38, 65} + }, + { + { 32, 33, 43, 29}, + { 32, 30, 29, 33}, + { 31, 47, 31, 33}, + { 33, 100, 31, 31}, + { 32, 33, 74, 25}, + { 32, 32, 34, 31}, + { 32, 33, 30, 33}, + { 32, 68, 30, 32}, + { 32, 31, 91, 40}, + { 32, 32, 58, 26}, + { 31, 31, 30, 32}, + { 31, 42, 30, 33}, + { 32, 31, 49, 85}, + { 32, 31, 83, 35}, + { 31, 33, 48, 29}, + { 31, 36, 32, 33} + }, + { + { 31, 29, 81, 35}, + { 32, 28, 34, 50}, + { 31, 75, 16, 43}, + { 34, 103, 29, 32}, + { 32, 32, 53, 78}, + { 31, 28, 36, 88}, + { 30, 52, 18, 73}, + { 52, 88, 17, 35}, + { 32, 32, 35, 94}, + { 30, 31, 35, 95}, + { 36, 29, 31, 92}, + { 100, 43, 16, 40}, + { 32, 32, 35, 93}, + { 30, 32, 38, 93}, + { 55, 18, 37, 83}, + { 127, 0, 30, 40} + }, + { + { 31, 22, 47, 30}, + { 31, 48, 25, 34}, + { 30, 95, 31, 32}, + { 32, 103, 33, 32}, + { 30, 24, 57, 31}, + { 30, 47, 26, 34}, + { 31, 95, 31, 32}, + { 43, 97, 35, 25}, + { 29, 26, 44, 63}, + { 37, 38, 24, 47}, + { 74, 63, 28, 20}, + { 110, 58, 34, 3}, + { 46, 22, 5, 108}, + { 93, 5, 9, 77}, + { 127, 0, 17, 52}, + { 127, 0, 15, 50} + }, + { + { 32, 27, 68, 24}, + { 35, 23, 35, 28}, + { 35, 64, 29, 29}, + { 37, 104, 33, 28}, + { 32, 32, 91, 40}, + { 36, 23, 67, 36}, + { 49, 23, 39, 28}, + { 60, 67, 30, 20}, + { 32, 32, 36, 95}, + { 35, 29, 38, 93}, + { 50, 16, 30, 84}, + { 72, 16, 15, 65}, + { 32, 32, 27, 100}, + { 33, 32, 29, 100}, + { 37, 29, 30, 98}, + { 48, 21, 29, 90} + } +}; + +ALIGNED(32) static const uint8_t uvg_mip_matrix_8x8[8][16][8] = +{ + { + { 30, 63, 46, 37, 25, 33, 33, 34}, + { 30, 60, 66, 38, 32, 31, 32, 33}, + { 29, 45, 74, 42, 32, 32, 32, 33}, + { 30, 39, 62, 58, 32, 33, 32, 33}, + { 30, 66, 55, 39, 32, 30, 30, 36}, + { 29, 54, 69, 40, 33, 31, 31, 33}, + { 28, 48, 71, 43, 32, 33, 32, 33}, + { 28, 41, 72, 46, 32, 34, 32, 33}, + { 30, 66, 56, 40, 32, 33, 28, 33}, + { 29, 55, 69, 39, 33, 33, 30, 32}, + { 27, 46, 72, 43, 33, 33, 32, 33}, + { 27, 42, 69, 48, 32, 34, 32, 33}, + { 30, 63, 55, 40, 32, 33, 35, 30}, + { 29, 56, 66, 40, 33, 33, 33, 30}, + { 27, 47, 69, 44, 33, 33, 33, 32}, + { 27, 42, 65, 50, 32, 34, 32, 33} + }, + { + { 32, 33, 30, 31, 74, 30, 31, 32}, + { 33, 56, 28, 30, 41, 29, 32, 32}, + { 33, 77, 52, 26, 29, 34, 30, 32}, + { 33, 37, 80, 41, 31, 34, 30, 32}, + { 32, 32, 33, 31, 59, 76, 28, 31}, + { 33, 31, 31, 30, 78, 40, 28, 32}, + { 33, 47, 28, 29, 53, 27, 31, 31}, + { 33, 61, 44, 28, 34, 32, 31, 31}, + { 32, 31, 34, 30, 26, 64, 76, 27}, + { 32, 31, 34, 29, 45, 86, 36, 29}, + { 33, 27, 34, 29, 73, 55, 25, 32}, + { 33, 33, 34, 30, 62, 33, 30, 31}, + { 32, 31, 34, 30, 30, 29, 58, 74}, + { 32, 31, 35, 29, 27, 53, 77, 35}, + { 32, 30, 36, 29, 40, 80, 44, 31}, + { 33, 28, 37, 30, 58, 60, 31, 33} + }, + { + { 32, 51, 27, 32, 27, 50, 29, 32}, + { 32, 95, 42, 29, 29, 42, 30, 32}, + { 32, 27, 99, 34, 31, 41, 29, 32}, + { 32, 34, 21, 104, 31, 42, 30, 32}, + { 32, 45, 30, 32, 9, 88, 40, 30}, + { 32, 77, 38, 30, 9, 76, 38, 30}, + { 32, 38, 78, 33, 14, 67, 37, 30}, + { 32, 30, 30, 87, 20, 59, 38, 31}, + { 33, 37, 32, 32, 27, 18, 106, 34}, + { 34, 44, 34, 31, 25, 17, 108, 31}, + { 36, 39, 45, 31, 24, 15, 108, 30}, + { 37, 31, 31, 54, 25, 14, 101, 32}, + { 36, 33, 32, 30, 29, 37, 13, 110}, + { 39, 32, 32, 29, 27, 37, 15, 108}, + { 44, 33, 31, 27, 25, 37, 16, 106}, + { 47, 30, 31, 32, 25, 34, 19, 102} + }, + { + { 32, 48, 35, 35, 47, 68, 31, 31}, + { 32, 33, 59, 40, 27, 71, 33, 30}, + { 32, 29, 47, 65, 24, 62, 37, 30}, + { 33, 33, 31, 81, 26, 50, 42, 32}, + { 32, 30, 40, 38, 30, 70, 55, 31}, + { 32, 20, 46, 50, 26, 55, 64, 31}, + { 33, 30, 29, 66, 25, 41, 72, 33}, + { 36, 34, 27, 69, 26, 31, 67, 39}, + { 33, 28, 36, 40, 30, 26, 85, 47}, + { 36, 27, 33, 50, 31, 20, 79, 53}, + { 43, 30, 26, 57, 28, 17, 67, 62}, + { 51, 27, 28, 55, 22, 23, 49, 70}, + { 38, 29, 32, 39, 28, 30, 22, 104}, + { 51, 31, 28, 43, 24, 31, 17, 102}, + { 69, 23, 30, 40, 15, 38, 10, 95}, + { 77, 13, 35, 38, 8, 43, 8, 90} + }, + { + { 32, 38, 32, 33, 101, 40, 29, 32}, + { 32, 40, 37, 32, 100, 36, 30, 32}, + { 32, 37, 46, 35, 94, 33, 30, 31}, + { 33, 34, 30, 62, 81, 35, 30, 31}, + { 32, 32, 33, 32, 22, 102, 39, 29}, + { 32, 31, 33, 33, 26, 104, 34, 28}, + { 33, 33, 33, 33, 31, 103, 32, 28}, + { 33, 32, 34, 36, 37, 94, 33, 28}, + { 32, 33, 32, 32, 34, 24, 99, 36}, + { 32, 34, 33, 33, 33, 30, 98, 32}, + { 33, 33, 34, 33, 31, 37, 95, 29}, + { 33, 33, 33, 36, 30, 46, 85, 31}, + { 32, 33, 32, 33, 30, 34, 23, 104}, + { 32, 34, 33, 33, 31, 32, 30, 98}, + { 32, 33, 34, 34, 31, 29, 39, 91}, + { 33, 33, 32, 37, 32, 30, 47, 82} + }, + { + { 32, 52, 48, 31, 38, 76, 26, 32}, + { 33, 19, 62, 50, 25, 50, 51, 31}, + { 33, 30, 20, 74, 29, 29, 54, 51}, + { 34, 35, 23, 56, 31, 25, 41, 76}, + { 33, 25, 38, 39, 28, 39, 83, 35}, + { 35, 28, 25, 47, 31, 23, 57, 74}, + { 37, 35, 22, 38, 31, 27, 30, 101}, + { 38, 32, 33, 29, 30, 31, 27, 103}, + { 34, 32, 27, 37, 32, 25, 41, 92}, + { 38, 33, 28, 32, 30, 31, 18, 111}, + { 40, 32, 33, 27, 29, 33, 18, 111}, + { 40, 32, 34, 27, 28, 33, 23, 105}, + { 35, 32, 30, 33, 31, 33, 20, 107}, + { 38, 31, 33, 30, 29, 33, 21, 106}, + { 40, 32, 33, 29, 29, 34, 22, 105}, + { 40, 32, 33, 30, 29, 34, 24, 101} + }, + { + { 32, 28, 31, 33, 92, 33, 30, 31}, + { 33, 30, 28, 33, 71, 26, 32, 30}, + { 33, 60, 26, 33, 47, 28, 33, 30}, + { 33, 63, 44, 36, 37, 31, 33, 30}, + { 33, 30, 31, 33, 43, 90, 33, 29}, + { 33, 28, 29, 34, 71, 71, 26, 30}, + { 33, 30, 26, 33, 86, 45, 28, 30}, + { 33, 38, 29, 32, 74, 32, 33, 29}, + { 33, 32, 30, 32, 29, 41, 95, 27}, + { 34, 31, 29, 33, 26, 71, 73, 22}, + { 34, 31, 29, 33, 37, 88, 46, 25}, + { 33, 32, 28, 34, 55, 75, 36, 28}, + { 34, 31, 30, 32, 33, 27, 43, 89}, + { 35, 32, 28, 33, 33, 23, 77, 59}, + { 34, 33, 28, 33, 30, 35, 91, 37}, + { 34, 34, 28, 34, 33, 53, 74, 31} + }, + { + { 33, 49, 26, 32, 26, 52, 28, 31}, + { 33, 71, 72, 24, 30, 32, 34, 31}, + { 32, 23, 70, 68, 32, 32, 32, 32}, + { 31, 33, 21, 106, 33, 32, 32, 33}, + { 34, 47, 32, 29, 5, 86, 44, 26}, + { 34, 44, 89, 28, 28, 37, 33, 30}, + { 32, 27, 46, 89, 33, 31, 31, 32}, + { 30, 33, 20, 107, 33, 33, 32, 33}, + { 35, 39, 42, 27, 26, 24, 92, 35}, + { 34, 27, 87, 43, 30, 34, 38, 31}, + { 31, 31, 32, 100, 32, 33, 30, 32}, + { 29, 32, 22, 106, 33, 33, 32, 33}, + { 35, 29, 47, 32, 32, 32, 17, 100}, + { 34, 24, 69, 60, 34, 33, 28, 44}, + { 31, 33, 31, 99, 32, 33, 32, 31}, + { 29, 33, 25, 103, 33, 33, 32, 35} + } +}; + +ALIGNED(32) static const uint8_t uvg_mip_matrix_16x16[6][64][8] = +{ + { + { 0, 42, 37, 33, 27, 44, 33, 35}, + { 0, 71, 39, 34, 24, 36, 35, 36}, + { 0, 77, 46, 35, 33, 30, 34, 36}, + { 0, 64, 60, 35, 33, 31, 32, 36}, + { 0, 49, 71, 38, 32, 32, 31, 36}, + { 0, 42, 66, 50, 33, 31, 32, 36}, + { 0, 40, 52, 67, 33, 31, 32, 35}, + { 0, 38, 43, 75, 33, 32, 32, 35}, + { 0, 56, 40, 33, 26, 43, 38, 36}, + { 0, 70, 49, 34, 30, 28, 38, 38}, + { 0, 65, 57, 36, 34, 28, 33, 39}, + { 0, 59, 60, 39, 33, 30, 31, 38}, + { 0, 55, 60, 43, 33, 30, 31, 38}, + { 0, 51, 61, 47, 33, 30, 32, 37}, + { 0, 46, 62, 51, 34, 30, 32, 37}, + { 0, 42, 60, 55, 33, 31, 32, 37}, + { 0, 60, 42, 34, 30, 37, 43, 38}, + { 0, 68, 52, 35, 35, 22, 37, 40}, + { 0, 62, 58, 37, 34, 28, 31, 40}, + { 0, 58, 59, 41, 33, 30, 30, 39}, + { 0, 56, 59, 44, 34, 30, 31, 38}, + { 0, 53, 60, 45, 33, 30, 31, 38}, + { 0, 49, 65, 45, 33, 30, 31, 38}, + { 0, 45, 64, 47, 33, 31, 32, 38}, + { 0, 59, 44, 35, 31, 34, 43, 41}, + { 0, 66, 53, 36, 35, 25, 31, 43}, + { 0, 61, 58, 38, 34, 29, 30, 40}, + { 0, 59, 57, 41, 33, 30, 31, 39}, + { 0, 57, 58, 43, 33, 30, 31, 39}, + { 0, 54, 61, 43, 33, 31, 31, 39}, + { 0, 51, 64, 43, 33, 31, 31, 39}, + { 0, 48, 64, 45, 33, 32, 31, 39}, + { 0, 57, 45, 35, 30, 35, 40, 44}, + { 0, 65, 54, 37, 33, 33, 24, 44}, + { 0, 63, 56, 38, 34, 30, 29, 39}, + { 0, 61, 56, 41, 34, 30, 32, 39}, + { 0, 58, 58, 42, 33, 31, 31, 39}, + { 0, 54, 62, 41, 33, 31, 31, 39}, + { 0, 51, 65, 42, 33, 31, 31, 39}, + { 0, 48, 63, 43, 33, 32, 31, 39}, + { 0, 55, 46, 35, 30, 36, 38, 47}, + { 0, 65, 53, 37, 32, 36, 26, 40}, + { 0, 65, 54, 38, 33, 31, 30, 38}, + { 0, 63, 55, 39, 33, 30, 32, 38}, + { 0, 59, 58, 40, 33, 31, 31, 39}, + { 0, 54, 64, 40, 33, 31, 30, 40}, + { 0, 49, 66, 40, 32, 32, 30, 41}, + { 0, 48, 64, 42, 32, 32, 30, 41}, + { 0, 54, 46, 35, 30, 34, 39, 49}, + { 0, 64, 52, 36, 32, 34, 34, 35}, + { 0, 65, 53, 37, 33, 32, 32, 37}, + { 0, 63, 55, 38, 33, 31, 31, 39}, + { 0, 59, 60, 38, 33, 31, 31, 40}, + { 0, 54, 64, 38, 33, 32, 30, 40}, + { 0, 49, 66, 39, 33, 32, 29, 41}, + { 0, 47, 64, 42, 32, 33, 29, 42}, + { 0, 51, 46, 35, 31, 33, 37, 54}, + { 0, 61, 51, 36, 32, 33, 38, 36}, + { 0, 63, 53, 37, 32, 32, 34, 37}, + { 0, 62, 55, 37, 33, 32, 32, 39}, + { 0, 58, 59, 37, 33, 32, 31, 40}, + { 0, 53, 63, 38, 33, 32, 31, 40}, + { 0, 49, 64, 40, 33, 33, 30, 41}, + { 0, 46, 62, 42, 33, 33, 30, 42} + }, + { + { 0, 39, 34, 33, 58, 44, 31, 32}, + { 0, 60, 38, 32, 40, 51, 30, 31}, + { 0, 73, 49, 31, 39, 48, 32, 31}, + { 0, 60, 73, 30, 39, 46, 33, 32}, + { 0, 43, 87, 35, 38, 45, 33, 32}, + { 0, 35, 78, 54, 36, 45, 33, 32}, + { 0, 33, 47, 86, 35, 44, 33, 32}, + { 0, 31, 17, 114, 34, 44, 34, 33}, + { 0, 43, 37, 32, 53, 70, 30, 31}, + { 0, 53, 50, 30, 42, 72, 31, 30}, + { 0, 52, 66, 30, 39, 70, 32, 30}, + { 0, 46, 78, 35, 37, 68, 34, 30}, + { 0, 43, 75, 48, 37, 66, 34, 30}, + { 0, 40, 62, 68, 35, 65, 35, 30}, + { 0, 33, 37, 97, 33, 62, 37, 31}, + { 0, 26, 14, 122, 32, 59, 38, 33}, + { 0, 40, 39, 33, 34, 87, 37, 30}, + { 0, 45, 54, 32, 34, 84, 41, 29}, + { 0, 41, 70, 35, 33, 83, 40, 29}, + { 0, 37, 73, 44, 32, 82, 40, 30}, + { 0, 37, 65, 60, 31, 81, 41, 29}, + { 0, 35, 48, 82, 30, 79, 43, 29}, + { 0, 28, 27, 108, 28, 76, 45, 30}, + { 0, 19, 11, 127, 27, 70, 46, 32}, + { 0, 38, 40, 34, 27, 73, 62, 28}, + { 0, 39, 54, 35, 30, 73, 62, 28}, + { 0, 33, 65, 41, 29, 75, 59, 28}, + { 0, 30, 65, 53, 27, 76, 58, 29}, + { 0, 29, 53, 72, 26, 77, 58, 29}, + { 0, 27, 35, 95, 24, 77, 60, 28}, + { 0, 19, 19, 117, 23, 74, 61, 30}, + { 0, 9, 16, 127, 23, 68, 60, 34}, + { 0, 35, 40, 35, 29, 44, 89, 30}, + { 0, 33, 51, 39, 29, 49, 86, 30}, + { 0, 28, 57, 49, 28, 53, 83, 30}, + { 0, 24, 52, 65, 26, 56, 82, 30}, + { 0, 22, 39, 86, 24, 58, 82, 30}, + { 0, 18, 22, 108, 23, 59, 82, 31}, + { 0, 10, 13, 125, 22, 58, 80, 33}, + { 0, 0, 19, 127, 22, 56, 74, 40}, + { 0, 33, 40, 36, 31, 28, 90, 45}, + { 0, 29, 46, 44, 29, 31, 92, 43}, + { 0, 24, 45, 58, 28, 34, 91, 43}, + { 0, 19, 37, 78, 26, 37, 91, 43}, + { 0, 15, 22, 99, 25, 38, 91, 42}, + { 0, 11, 11, 118, 24, 39, 90, 44}, + { 0, 2, 11, 127, 23, 41, 85, 48}, + { 0, 0, 17, 127, 23, 43, 75, 55}, + { 0, 31, 37, 39, 30, 28, 54, 82}, + { 0, 27, 37, 52, 28, 30, 58, 79}, + { 0, 22, 30, 70, 27, 32, 58, 79}, + { 0, 15, 19, 91, 26, 33, 58, 79}, + { 0, 10, 8, 111, 25, 34, 58, 79}, + { 0, 5, 2, 125, 25, 35, 57, 80}, + { 0, 0, 9, 127, 25, 36, 53, 84}, + { 0, 0, 13, 127, 25, 39, 47, 88}, + { 0, 28, 29, 46, 28, 39, 2, 123}, + { 0, 24, 24, 62, 27, 41, 1, 125}, + { 0, 19, 14, 81, 25, 43, 0, 126}, + { 0, 13, 4, 101, 24, 44, 0, 127}, + { 0, 6, 0, 116, 23, 45, 0, 127}, + { 0, 0, 0, 126, 23, 45, 1, 127}, + { 0, 0, 4, 127, 25, 44, 2, 127}, + { 0, 0, 9, 127, 25, 44, 3, 127} + }, + { + { 0, 30, 32, 32, 42, 34, 32, 32}, + { 0, 63, 26, 34, 16, 38, 32, 32}, + { 0, 98, 26, 34, 25, 34, 33, 32}, + { 0, 75, 61, 30, 31, 32, 33, 32}, + { 0, 36, 94, 32, 30, 33, 32, 32}, + { 0, 26, 76, 58, 30, 33, 32, 32}, + { 0, 30, 39, 91, 31, 32, 33, 31}, + { 0, 32, 23, 105, 32, 32, 32, 32}, + { 0, 34, 30, 33, 31, 52, 29, 32}, + { 0, 66, 24, 34, 11, 41, 33, 32}, + { 0, 97, 28, 34, 24, 34, 33, 32}, + { 0, 71, 65, 30, 30, 32, 33, 32}, + { 0, 34, 92, 35, 30, 33, 32, 32}, + { 0, 26, 70, 64, 29, 34, 32, 32}, + { 0, 30, 37, 94, 30, 33, 32, 31}, + { 0, 32, 23, 105, 31, 33, 33, 31}, + { 0, 37, 29, 33, 8, 79, 27, 32}, + { 0, 71, 22, 35, 5, 50, 32, 32}, + { 0, 98, 29, 34, 23, 34, 34, 32}, + { 0, 66, 70, 30, 31, 31, 33, 32}, + { 0, 31, 92, 38, 30, 33, 32, 32}, + { 0, 26, 66, 68, 29, 34, 32, 31}, + { 0, 30, 34, 97, 30, 34, 33, 31}, + { 0, 31, 22, 106, 30, 34, 33, 31}, + { 0, 40, 28, 34, 0, 76, 46, 28}, + { 0, 76, 21, 35, 0, 55, 35, 32}, + { 0, 97, 32, 34, 21, 37, 33, 33}, + { 0, 61, 75, 29, 30, 32, 32, 32}, + { 0, 29, 92, 40, 29, 33, 32, 32}, + { 0, 26, 62, 73, 29, 34, 32, 31}, + { 0, 29, 32, 99, 30, 34, 33, 30}, + { 0, 31, 22, 107, 30, 34, 33, 31}, + { 0, 42, 27, 34, 1, 48, 79, 25}, + { 0, 80, 20, 35, 0, 48, 47, 31}, + { 0, 94, 36, 32, 17, 40, 33, 33}, + { 0, 55, 80, 29, 27, 35, 31, 32}, + { 0, 27, 90, 43, 28, 34, 32, 31}, + { 0, 26, 58, 76, 29, 33, 33, 30}, + { 0, 29, 30, 101, 29, 34, 34, 30}, + { 0, 31, 21, 108, 29, 35, 34, 30}, + { 0, 44, 26, 34, 6, 30, 80, 40}, + { 0, 81, 21, 35, 0, 41, 52, 35}, + { 0, 90, 41, 31, 14, 41, 35, 33}, + { 0, 51, 82, 29, 24, 37, 32, 32}, + { 0, 27, 87, 47, 27, 35, 32, 31}, + { 0, 26, 54, 79, 29, 34, 33, 30}, + { 0, 29, 29, 102, 28, 34, 33, 30}, + { 0, 31, 21, 108, 28, 35, 33, 31}, + { 0, 47, 26, 34, 7, 34, 44, 75}, + { 0, 80, 24, 34, 0, 41, 41, 50}, + { 0, 84, 45, 31, 12, 40, 36, 36}, + { 0, 49, 81, 31, 22, 37, 33, 32}, + { 0, 28, 81, 51, 26, 35, 33, 31}, + { 0, 28, 51, 81, 28, 34, 33, 30}, + { 0, 29, 30, 101, 28, 35, 33, 31}, + { 0, 31, 22, 107, 28, 35, 33, 32}, + { 0, 48, 27, 34, 10, 40, 16, 97}, + { 0, 75, 27, 34, 3, 42, 26, 66}, + { 0, 77, 47, 33, 12, 40, 32, 43}, + { 0, 49, 75, 36, 21, 37, 33, 35}, + { 0, 32, 72, 55, 25, 36, 33, 32}, + { 0, 30, 49, 81, 27, 35, 33, 31}, + { 0, 30, 32, 98, 28, 35, 32, 32}, + { 0, 31, 24, 104, 28, 35, 32, 33} + }, + { + { 0, 36, 29, 33, 43, 47, 29, 31}, + { 0, 74, 20, 35, 19, 47, 34, 32}, + { 0, 92, 35, 32, 29, 31, 40, 34}, + { 0, 53, 80, 26, 33, 28, 36, 37}, + { 0, 24, 91, 41, 31, 31, 31, 38}, + { 0, 25, 57, 74, 31, 32, 30, 37}, + { 0, 32, 28, 99, 32, 32, 29, 36}, + { 0, 34, 20, 105, 33, 32, 30, 35}, + { 0, 50, 26, 34, 33, 74, 30, 31}, + { 0, 75, 28, 33, 23, 46, 47, 33}, + { 0, 64, 58, 29, 30, 26, 46, 40}, + { 0, 31, 85, 37, 31, 27, 33, 44}, + { 0, 22, 67, 64, 30, 31, 28, 42}, + { 0, 29, 35, 93, 31, 32, 27, 40}, + { 0, 33, 20, 105, 32, 33, 27, 37}, + { 0, 34, 19, 106, 33, 32, 29, 36}, + { 0, 51, 29, 33, 25, 72, 51, 30}, + { 0, 61, 42, 31, 30, 31, 60, 39}, + { 0, 40, 70, 34, 32, 24, 41, 50}, + { 0, 22, 72, 54, 30, 31, 27, 50}, + { 0, 25, 44, 83, 30, 33, 25, 44}, + { 0, 32, 23, 102, 32, 33, 26, 40}, + { 0, 34, 18, 107, 32, 33, 28, 37}, + { 0, 34, 19, 105, 33, 32, 30, 35}, + { 0, 45, 35, 32, 30, 39, 79, 33}, + { 0, 43, 53, 33, 35, 24, 53, 55}, + { 0, 27, 67, 45, 32, 29, 27, 61}, + { 0, 22, 53, 72, 30, 33, 22, 52}, + { 0, 28, 31, 95, 31, 33, 25, 43}, + { 0, 32, 20, 105, 32, 33, 27, 38}, + { 0, 34, 18, 107, 32, 32, 29, 36}, + { 0, 34, 20, 105, 33, 31, 31, 35}, + { 0, 38, 40, 32, 35, 23, 72, 54}, + { 0, 31, 55, 39, 34, 29, 32, 73}, + { 0, 22, 57, 60, 31, 35, 18, 64}, + { 0, 25, 39, 86, 31, 35, 22, 49}, + { 0, 30, 24, 101, 32, 33, 27, 40}, + { 0, 33, 19, 106, 32, 32, 30, 36}, + { 0, 34, 18, 107, 33, 31, 31, 35}, + { 0, 34, 20, 104, 33, 31, 32, 34}, + { 0, 33, 42, 35, 34, 28, 39, 82}, + { 0, 26, 51, 50, 33, 34, 18, 80}, + { 0, 23, 46, 74, 31, 35, 20, 59}, + { 0, 27, 32, 93, 32, 34, 26, 44}, + { 0, 31, 22, 103, 32, 32, 30, 37}, + { 0, 33, 19, 106, 33, 31, 31, 35}, + { 0, 34, 19, 106, 33, 31, 32, 34}, + { 0, 35, 21, 103, 34, 31, 32, 34}, + { 0, 29, 41, 41, 33, 34, 20, 92}, + { 0, 24, 44, 62, 34, 35, 18, 73}, + { 0, 24, 37, 83, 34, 33, 25, 52}, + { 0, 28, 28, 97, 33, 32, 30, 40}, + { 0, 32, 23, 103, 33, 31, 32, 36}, + { 0, 34, 20, 105, 34, 30, 33, 34}, + { 0, 35, 20, 104, 34, 30, 33, 33}, + { 0, 35, 22, 102, 34, 30, 33, 34}, + { 0, 27, 38, 51, 34, 34, 20, 86}, + { 0, 26, 37, 71, 35, 34, 24, 64}, + { 0, 27, 33, 87, 35, 32, 30, 47}, + { 0, 30, 28, 96, 34, 31, 32, 39}, + { 0, 32, 24, 100, 35, 30, 32, 36}, + { 0, 34, 23, 101, 34, 30, 33, 34}, + { 0, 35, 23, 101, 34, 30, 32, 34}, + { 0, 34, 24, 99, 35, 30, 33, 34} + }, + { + { 0, 39, 30, 31, 67, 33, 34, 31}, + { 0, 72, 21, 32, 43, 39, 33, 31}, + { 0, 100, 23, 32, 35, 39, 34, 31}, + { 0, 75, 63, 24, 32, 38, 34, 32}, + { 0, 32, 98, 26, 29, 37, 35, 32}, + { 0, 22, 77, 55, 29, 36, 35, 31}, + { 0, 31, 37, 90, 31, 35, 35, 32}, + { 0, 35, 22, 100, 33, 33, 36, 33}, + { 0, 47, 29, 32, 74, 54, 32, 31}, + { 0, 71, 24, 32, 60, 50, 36, 30}, + { 0, 86, 31, 30, 46, 48, 37, 30}, + { 0, 65, 63, 25, 34, 46, 39, 30}, + { 0, 33, 85, 32, 28, 43, 40, 30}, + { 0, 26, 64, 60, 27, 39, 41, 30}, + { 0, 33, 33, 87, 29, 35, 41, 31}, + { 0, 37, 23, 93, 32, 33, 41, 32}, + { 0, 41, 32, 32, 45, 84, 32, 32}, + { 0, 55, 31, 32, 50, 70, 40, 30}, + { 0, 62, 37, 31, 45, 61, 45, 29}, + { 0, 53, 55, 31, 36, 55, 48, 29}, + { 0, 38, 63, 40, 29, 48, 50, 28}, + { 0, 34, 49, 60, 27, 43, 51, 29}, + { 0, 38, 30, 78, 28, 38, 50, 31}, + { 0, 40, 24, 83, 30, 36, 48, 33}, + { 0, 35, 33, 33, 29, 75, 58, 29}, + { 0, 39, 35, 33, 34, 68, 59, 29}, + { 0, 41, 39, 34, 36, 61, 62, 29}, + { 0, 41, 43, 37, 33, 54, 64, 28}, + { 0, 41, 43, 45, 30, 48, 65, 29}, + { 0, 42, 36, 56, 27, 44, 63, 30}, + { 0, 42, 30, 65, 27, 41, 60, 33}, + { 0, 42, 28, 68, 28, 37, 56, 36}, + { 0, 33, 34, 33, 31, 42, 88, 30}, + { 0, 31, 36, 34, 31, 44, 84, 31}, + { 0, 31, 37, 35, 32, 43, 83, 31}, + { 0, 35, 35, 39, 32, 40, 82, 31}, + { 0, 40, 32, 44, 31, 38, 81, 31}, + { 0, 44, 30, 48, 30, 37, 78, 33}, + { 0, 44, 30, 52, 28, 37, 72, 36}, + { 0, 43, 30, 55, 29, 35, 66, 40}, + { 0, 32, 33, 33, 34, 25, 85, 48}, + { 0, 30, 34, 34, 33, 25, 88, 44}, + { 0, 30, 34, 36, 34, 25, 90, 41}, + { 0, 33, 32, 38, 34, 25, 90, 40}, + { 0, 38, 29, 41, 34, 26, 88, 40}, + { 0, 42, 29, 41, 33, 27, 85, 41}, + { 0, 43, 30, 42, 31, 28, 80, 43}, + { 0, 42, 31, 45, 31, 30, 72, 47}, + { 0, 32, 33, 33, 33, 26, 54, 79}, + { 0, 31, 32, 34, 35, 20, 68, 68}, + { 0, 32, 32, 35, 36, 17, 76, 62}, + { 0, 34, 31, 36, 36, 17, 79, 59}, + { 0, 37, 29, 37, 36, 18, 78, 58}, + { 0, 39, 29, 37, 35, 20, 77, 58}, + { 0, 41, 30, 37, 34, 22, 74, 58}, + { 0, 40, 31, 40, 32, 26, 68, 59}, + { 0, 33, 31, 34, 33, 29, 31, 98}, + { 0, 34, 30, 34, 35, 23, 45, 88}, + { 0, 34, 31, 34, 36, 20, 54, 82}, + { 0, 35, 31, 34, 36, 18, 59, 78}, + { 0, 36, 31, 34, 37, 19, 60, 76}, + { 0, 38, 30, 34, 36, 20, 61, 74}, + { 0, 39, 31, 35, 35, 22, 60, 73}, + { 0, 39, 31, 37, 34, 24, 59, 71} + }, + { + { 0, 30, 33, 32, 55, 32, 32, 32}, + { 0, 47, 30, 31, 29, 36, 32, 32}, + { 0, 81, 28, 32, 28, 34, 32, 32}, + { 0, 85, 46, 29, 32, 32, 33, 32}, + { 0, 54, 82, 26, 32, 32, 33, 32}, + { 0, 30, 90, 38, 31, 32, 33, 32}, + { 0, 30, 56, 73, 31, 33, 32, 32}, + { 0, 37, 21, 102, 32, 32, 32, 32}, + { 0, 33, 32, 31, 68, 39, 31, 31}, + { 0, 38, 32, 31, 43, 34, 33, 31}, + { 0, 63, 30, 31, 29, 34, 32, 32}, + { 0, 82, 37, 30, 29, 33, 32, 32}, + { 0, 71, 63, 27, 31, 32, 33, 32}, + { 0, 44, 86, 30, 30, 33, 33, 32}, + { 0, 33, 72, 55, 30, 32, 32, 31}, + { 0, 37, 37, 86, 31, 32, 33, 31}, + { 0, 34, 33, 32, 60, 61, 29, 32}, + { 0, 36, 33, 31, 56, 38, 32, 31}, + { 0, 51, 30, 31, 38, 33, 33, 32}, + { 0, 75, 31, 31, 30, 33, 33, 32}, + { 0, 80, 47, 29, 30, 32, 33, 31}, + { 0, 60, 73, 27, 30, 33, 33, 31}, + { 0, 41, 78, 41, 30, 33, 32, 31}, + { 0, 38, 53, 68, 30, 32, 33, 31}, + { 0, 33, 33, 32, 43, 77, 35, 30}, + { 0, 35, 33, 31, 55, 54, 29, 32}, + { 0, 43, 32, 31, 46, 39, 31, 32}, + { 0, 64, 30, 31, 35, 34, 33, 32}, + { 0, 79, 37, 30, 31, 32, 33, 31}, + { 0, 73, 57, 28, 30, 32, 33, 31}, + { 0, 54, 73, 33, 30, 32, 33, 31}, + { 0, 43, 64, 52, 30, 32, 33, 31}, + { 0, 33, 33, 32, 34, 68, 58, 28}, + { 0, 34, 33, 31, 45, 70, 33, 31}, + { 0, 38, 33, 31, 48, 52, 29, 32}, + { 0, 54, 31, 31, 40, 39, 31, 32}, + { 0, 73, 32, 31, 34, 34, 33, 31}, + { 0, 77, 45, 29, 31, 32, 32, 32}, + { 0, 65, 63, 30, 31, 31, 33, 31}, + { 0, 51, 66, 42, 30, 32, 33, 31}, + { 0, 33, 32, 32, 34, 44, 81, 31}, + { 0, 34, 33, 31, 38, 66, 52, 28}, + { 0, 36, 33, 30, 44, 62, 34, 31}, + { 0, 47, 31, 31, 43, 48, 30, 32}, + { 0, 64, 31, 31, 38, 38, 32, 32}, + { 0, 75, 38, 30, 33, 34, 32, 32}, + { 0, 71, 53, 30, 31, 32, 33, 32}, + { 0, 59, 61, 37, 30, 32, 33, 32}, + { 0, 33, 32, 31, 35, 31, 71, 54}, + { 0, 34, 33, 31, 37, 49, 70, 33}, + { 0, 36, 33, 31, 41, 60, 48, 30}, + { 0, 43, 32, 31, 43, 54, 35, 31}, + { 0, 56, 31, 31, 40, 44, 32, 32}, + { 0, 68, 35, 30, 36, 37, 32, 32}, + { 0, 70, 45, 30, 33, 34, 33, 32}, + { 0, 63, 55, 35, 31, 33, 33, 32}, + { 0, 33, 32, 31, 33, 34, 36, 87}, + { 0, 34, 32, 31, 36, 38, 62, 52}, + { 0, 36, 33, 31, 39, 50, 57, 36}, + { 0, 41, 33, 31, 41, 53, 43, 33}, + { 0, 50, 33, 31, 41, 48, 36, 32}, + { 0, 59, 35, 31, 37, 41, 34, 32}, + { 0, 65, 42, 31, 35, 36, 33, 32}, + { 0, 62, 49, 35, 33, 34, 34, 33} + } +}; + diff --git a/src/strategies/strategies-intra.c b/src/strategies/strategies-intra.c index d12b37f7..e1f82d92 100644 --- a/src/strategies/strategies-intra.c +++ b/src/strategies/strategies-intra.c @@ -42,6 +42,7 @@ angular_pred_func *uvg_angular_pred; intra_pred_planar_func *uvg_intra_pred_planar; intra_pred_filtered_dc_func *uvg_intra_pred_filtered_dc; pdpc_planar_dc_func *uvg_pdpc_planar_dc; +mip_pred_func *uvg_mip_predict; int uvg_strategy_register_intra(void* opaque, uint8_t bitdepth) { bool success = true; diff --git a/src/strategies/strategies-intra.h b/src/strategies/strategies-intra.h index 52f5e519..eeec4b09 100644 --- a/src/strategies/strategies-intra.h +++ b/src/strategies/strategies-intra.h @@ -76,11 +76,20 @@ typedef void (pdpc_planar_dc_func)( const uvg_intra_ref *const used_ref, uvg_pixel *const dst); +typedef void(mip_pred_func)( + const uvg_intra_references * const refs, + const uint16_t pred_block_width, + const uint16_t pred_block_height, + uvg_pixel *dst, + const int mip_mode, + const bool mip_transp); + // Declare function pointers. extern angular_pred_func * uvg_angular_pred; extern intra_pred_planar_func * uvg_intra_pred_planar; extern intra_pred_filtered_dc_func * uvg_intra_pred_filtered_dc; extern pdpc_planar_dc_func * uvg_pdpc_planar_dc; +extern mip_pred_func *uvg_mip_predict; int uvg_strategy_register_intra(void* opaque, uint8_t bitdepth); @@ -90,6 +99,7 @@ int uvg_strategy_register_intra(void* opaque, uint8_t bitdepth); {"intra_pred_planar", (void**) &uvg_intra_pred_planar}, \ {"intra_pred_filtered_dc", (void**) &uvg_intra_pred_filtered_dc}, \ {"pdpc_planar_dc", (void**) &uvg_pdpc_planar_dc}, \ + {"mip_predict", (void**) &uvg_mip_predict},