diff --git a/build-scripts/config_common.cmake b/build-scripts/config_common.cmake index b6503d808d..3db29e848c 100644 --- a/build-scripts/config_common.cmake +++ b/build-scripts/config_common.cmake @@ -320,12 +320,18 @@ else () message (" Wakeup of blocking operations enabled") endif () if (WAMR_BUILD_SIMD EQUAL 1) - if (NOT WAMR_BUILD_TARGET MATCHES "RISCV64.*") - add_definitions (-DWASM_ENABLE_SIMD=1) - message (" SIMD enabled") - else () + set(SIMD_ENABLED 0) + if (WAMR_BUILD_TARGET MATCHES "RISCV64.*") + set(WAMR_BUILD_SIMD 0) message (" SIMD disabled due to not supported on target RISCV64") + elseif (WAMR_BUILD_FAST_INTERP EQUAL 1 AND WAMR_BUILD_SIMDE EQUAL 0) + set(WAMR_BUILD_SIMD 0) + message(" SIMD disabled as the simde is not built in fast interpreter mode") + else() + set(SIMD_ENABLED 1) + message (" SIMD enabled") endif () + add_definitions(-DWASM_ENABLE_SIMD=${SIMD_ENABLED}) endif () if (WAMR_BUILD_AOT_STACK_FRAME EQUAL 1) add_definitions (-DWASM_ENABLE_AOT_STACK_FRAME=1) diff --git a/build-scripts/runtime_lib.cmake b/build-scripts/runtime_lib.cmake index 29789d671c..ec3a370d61 100644 --- a/build-scripts/runtime_lib.cmake +++ b/build-scripts/runtime_lib.cmake @@ -155,8 +155,14 @@ if (WAMR_BUILD_LIB_RATS EQUAL 1) include (${IWASM_DIR}/libraries/lib-rats/lib_rats.cmake) endif () -if (WAMR_BUILD_LIB_SIMDE EQUAL 1) - include (${IWASM_DIR}/libraries/simde/simde.cmake) +if (WAMR_BUILD_SIMD EQUAL 1 AND WAMR_BUILD_FAST_INTERP EQUAL 1) + if (NOT (WAMR_BUILD_TARGET MATCHES "AARCH64.*" OR WAMR_BUILD_TARGET MATCHES "ARM.*")) + message(STATUS "SIMDe doesnt support platform " ${WAMR_BUILD_TARGET}) + set(WAMR_BUILD_SIMDE 0) + else() + include (${IWASM_DIR}/libraries/simde/simde.cmake) + set (WAMR_BUILD_SIMDE 1) + endif() endif () if (WAMR_BUILD_WASM_CACHE EQUAL 1) diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index cd9dd91eb8..6dc9351e03 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -1699,6 +1699,11 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, GET_OPERAND(uint64, I64, off)); ret_offset += 2; } + else if (ret_types[ret_idx] == VALUE_TYPE_V128) { + PUT_V128_TO_ADDR(prev_frame->lp + ret_offset, + GET_OPERAND_V128(off)); + ret_offset += 4; + } #if WASM_ENABLE_GC != 0 else if (wasm_is_type_reftype(ret_types[ret_idx])) { PUT_REF_TO_ADDR(prev_frame->lp + ret_offset, @@ -3536,6 +3541,24 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, HANDLE_OP_END(); } +#if WASM_ENABLE_SIMD != 0 + HANDLE_OP(EXT_OP_SET_LOCAL_FAST_V128) + HANDLE_OP(EXT_OP_TEE_LOCAL_FAST_V128) + { + /* clang-format off */ +#if WASM_CPU_SUPPORTS_UNALIGNED_ADDR_ACCESS != 0 + local_offset = *frame_ip++; +#else + local_offset = *frame_ip; + frame_ip += 2; +#endif + /* clang-format on */ + PUT_V128_TO_ADDR((uint32 *)(frame_lp + local_offset), + GET_OPERAND_V128(0)); + frame_ip += 2; + HANDLE_OP_END(); + } +#endif HANDLE_OP(WASM_OP_GET_GLOBAL) { global_idx = read_uint32(frame_ip); @@ -3572,7 +3595,19 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, GET_I64_FROM_ADDR((uint32 *)global_addr)); HANDLE_OP_END(); } - +#if WASM_ENABLE_SIMD != 0 + HANDLE_OP(WASM_OP_GET_GLOBAL_V128) + { + global_idx = read_uint32(frame_ip); + bh_assert(global_idx < module->e->global_count); + global = globals + global_idx; + global_addr = get_global_addr(global_data, global); + addr_ret = GET_OFFSET(); + PUT_V128_TO_ADDR(frame_lp + addr_ret, + GET_V128_FROM_ADDR((uint32 *)global_addr)); + HANDLE_OP_END(); + } +#endif HANDLE_OP(WASM_OP_SET_GLOBAL) { global_idx = read_uint32(frame_ip); @@ -3639,6 +3674,19 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, GET_I64_FROM_ADDR(frame_lp + addr1)); HANDLE_OP_END(); } +#if WASM_ENABLE_SIMDE != 0 + HANDLE_OP(WASM_OP_SET_GLOBAL_V128) + { + global_idx = read_uint32(frame_ip); + bh_assert(global_idx < module->e->global_count); + global = globals + global_idx; + global_addr = get_global_addr(global_data, global); + addr1 = GET_OFFSET(); + PUT_V128_TO_ADDR((uint32 *)global_addr, + GET_V128_FROM_ADDR(frame_lp + addr1)); + HANDLE_OP_END(); + } +#endif /* memory load instructions */ HANDLE_OP(WASM_OP_I32_LOAD) @@ -4884,6 +4932,28 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, HANDLE_OP_END(); } +#if WASM_ENABLE_SIMD != 0 + HANDLE_OP(EXT_OP_COPY_STACK_TOP_V128) + { + addr1 = GET_OFFSET(); + addr2 = GET_OFFSET(); + + PUT_V128_TO_ADDR(frame_lp + addr2, + GET_V128_FROM_ADDR(frame_lp + addr1)); + +#if WASM_ENABLE_GC != 0 + /* Ignore constants because they are not reference */ + if (addr1 >= 0) { + if (*FRAME_REF(addr1)) { + CLEAR_FRAME_REF(addr1); + SET_FRAME_REF(addr2); + } + } +#endif + + HANDLE_OP_END(); + } +#endif HANDLE_OP(EXT_OP_COPY_STACK_VALUES) { @@ -5773,73 +5843,60 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, PUT_V128_TO_ADDR(frame_lp + addr_ret, LOAD_V128(maddr)); break; } -#define SIMD_LOAD_OP(op_name, simde_func, element_size, num_elements) \ - do { \ - uint32 offset, addr; \ - offset = read_uint32(frame_ip); \ - addr = GET_OPERAND(uint32, I32, 0); \ - frame_ip += 2; \ - addr_ret = GET_OFFSET(); \ - CHECK_MEMORY_OVERFLOW(16); \ - \ - simde_v128_t simde_result = simde_func(maddr); \ - \ - V128 result; \ - SIMDE_V128_TO_SIMD_V128(simde_result, result); \ - \ - V128 reversed_result; \ - for (int i = 0; i < num_elements; i++) { \ - reversed_result.i##element_size##x##num_elements[i] = \ - result.i##element_size##x##num_elements[num_elements - 1 - i]; \ - } \ - PUT_V128_TO_ADDR(frame_lp + addr_ret, reversed_result); \ - \ - break; \ +#define SIMD_LOAD_OP(simde_func, element_size, num_elements) \ + do { \ + uint32 offset, addr; \ + offset = read_uint32(frame_ip); \ + addr = GET_OPERAND(uint32, I32, 0); \ + frame_ip += 2; \ + addr_ret = GET_OFFSET(); \ + CHECK_MEMORY_OVERFLOW(16); \ + \ + simde_v128_t simde_result = simde_func(maddr); \ + \ + V128 result; \ + SIMDE_V128_TO_SIMD_V128(simde_result, result); \ + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); \ + \ } while (0) case SIMD_v128_load8x8_s: { - SIMD_LOAD_OP(SIMD_v128_load8x8_s, - simde_wasm_i16x8_load8x8, 16, 8); + SIMD_LOAD_OP(simde_wasm_i16x8_load8x8, 16, 8); break; } case SIMD_v128_load8x8_u: { - SIMD_LOAD_OP(SIMD_v128_load8x8_u, - simde_wasm_u16x8_load8x8, 16, 8); + SIMD_LOAD_OP(simde_wasm_u16x8_load8x8, 16, 8); break; } case SIMD_v128_load16x4_s: { - SIMD_LOAD_OP(SIMD_v128_load16x4_s, - simde_wasm_i32x4_load16x4, 32, 4); + SIMD_LOAD_OP(simde_wasm_i32x4_load16x4, 32, 4); break; } case SIMD_v128_load16x4_u: { - SIMD_LOAD_OP(SIMD_v128_load16x4_u, - simde_wasm_u32x4_load16x4, 32, 4); + SIMD_LOAD_OP(simde_wasm_u32x4_load16x4, 32, 4); break; } case SIMD_v128_load32x2_s: { - SIMD_LOAD_OP(SIMD_v128_load32x2_s, - simde_wasm_i64x2_load32x2, 64, 2); + SIMD_LOAD_OP(simde_wasm_i64x2_load32x2, 64, 2); break; } case SIMD_v128_load32x2_u: { - SIMD_LOAD_OP(SIMD_v128_load32x2_u, - simde_wasm_u64x2_load32x2, 64, 2); + SIMD_LOAD_OP(simde_wasm_u64x2_load32x2, 64, 2); break; } -#define SIMD_LOAD_SPLAT_OP(op_name, simde_func) \ +#define SIMD_LOAD_SPLAT_OP(simde_func) \ do { \ uint32 offset, addr; \ offset = read_uint32(frame_ip); \ addr = GET_OPERAND(uint32, I32, 0); \ frame_ip += 2; \ addr_ret = GET_OFFSET(); \ - CHECK_MEMORY_OVERFLOW(16); \ + CHECK_MEMORY_OVERFLOW(4); \ \ simde_v128_t simde_result = simde_func(maddr); \ \ @@ -5851,26 +5908,22 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, case SIMD_v128_load8_splat: { - SIMD_LOAD_SPLAT_OP(SIMD_v128_load8_splat, - simde_wasm_v128_load8_splat); + SIMD_LOAD_SPLAT_OP(simde_wasm_v128_load8_splat); break; } case SIMD_v128_load16_splat: { - SIMD_LOAD_SPLAT_OP(SIMD_v128_load16_splat, - simde_wasm_v128_load16_splat); + SIMD_LOAD_SPLAT_OP(simde_wasm_v128_load16_splat); break; } case SIMD_v128_load32_splat: { - SIMD_LOAD_SPLAT_OP(SIMD_v128_load32_splat, - simde_wasm_v128_load32_splat); + SIMD_LOAD_SPLAT_OP(simde_wasm_v128_load32_splat); break; } case SIMD_v128_load64_splat: { - SIMD_LOAD_SPLAT_OP(SIMD_v128_load64_splat, - simde_wasm_v128_load64_splat); + SIMD_LOAD_SPLAT_OP(simde_wasm_v128_load64_splat); break; } case SIMD_v128_store: @@ -5903,14 +5956,14 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, case SIMD_v8x16_shuffle: { V128 indices; - V128 v2 = POP_V128(); - V128 v1 = POP_V128(); - addr_ret = GET_OFFSET(); - bh_memcpy_s(&indices, sizeof(V128), frame_ip, sizeof(V128)); frame_ip += sizeof(V128); + V128 v2 = POP_V128(); + V128 v1 = POP_V128(); + addr_ret = GET_OFFSET(); + V128 result; for (int i = 0; i < 16; i++) { uint8_t index = indices.i8x16[i]; @@ -5938,6 +5991,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, SIMDE_V128_TO_SIMD_V128(simde_result, result); PUT_V128_TO_ADDR(frame_lp + addr_ret, result); + break; } /* Splat */ @@ -5963,7 +6017,15 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, case SIMD_i8x16_splat: { - SIMD_SPLAT_OP_I32(simde_wasm_i8x16_splat); + uint32 val = POP_I32(); + addr_ret = GET_OFFSET(); + + simde_v128_t simde_result = simde_wasm_i8x16_splat(val); + + V128 result; + SIMDE_V128_TO_SIMD_V128(simde_result, result); + + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); break; } case SIMD_i16x8_splat: @@ -6079,8 +6141,8 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, #define SIMD_DOUBLE_OP(simde_func) \ do { \ - V128 v1 = POP_V128(); \ V128 v2 = POP_V128(); \ + V128 v1 = POP_V128(); \ addr_ret = GET_OFFSET(); \ \ simde_v128_t simde_result = simde_func(SIMD_V128_TO_SIMDE_V128(v1), \ @@ -6095,7 +6157,18 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, /* i8x16 comparison operations */ case SIMD_i8x16_eq: { - SIMD_DOUBLE_OP(simde_wasm_i8x16_eq); + V128 v2 = POP_V128(); + V128 v1 = POP_V128(); + addr_ret = GET_OFFSET(); + + simde_v128_t simde_result = + simde_wasm_i8x16_eq(SIMD_V128_TO_SIMDE_V128(v1), + SIMD_V128_TO_SIMDE_V128(v2)); + + V128 result; + SIMDE_V128_TO_SIMD_V128(simde_result, result); + + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); break; } case SIMD_i8x16_ne: @@ -7480,8 +7553,14 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } for (i = 0; i < cur_func->param_count; i++) { - if (cur_func->param_types[i] == VALUE_TYPE_I64 - || cur_func->param_types[i] == VALUE_TYPE_F64) { + if (cur_func->param_types[i] == VALUE_TYPE_V128) { + PUT_V128_TO_ADDR( + outs_area->lp, + GET_OPERAND_V128(2 * (cur_func->param_count - i - 1))); + outs_area->lp += 4; + } + else if (cur_func->param_types[i] == VALUE_TYPE_I64 + || cur_func->param_types[i] == VALUE_TYPE_F64) { PUT_I64_TO_ADDR( outs_area->lp, GET_OPERAND(uint64, I64, diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index a39dff89dc..eaba5555b0 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -7300,6 +7300,10 @@ wasm_loader_find_block_addr(WASMExecEnv *exec_env, BlockAddr *block_addr_cache, case WASM_OP_SET_GLOBAL: case WASM_OP_GET_GLOBAL_64: case WASM_OP_SET_GLOBAL_64: +#if WASM_ENABLE_SIMDE != 0 + case WASM_OP_GET_GLOBAL_V128: + case WASM_OP_SET_GLOBAL_V128: +#endif case WASM_OP_SET_GLOBAL_AUX_STACK: skip_leb_uint32(p, p_end); /* local index */ break; @@ -9088,6 +9092,7 @@ preserve_referenced_local(WASMLoaderContext *loader_ctx, uint8 opcode, bool *preserved, char *error_buf, uint32 error_buf_size) { + uint32 i = 0; int16 preserved_offset = (int16)local_index; @@ -9111,6 +9116,13 @@ preserve_referenced_local(WASMLoaderContext *loader_ctx, uint8 opcode, loader_ctx->preserved_local_offset++; emit_label(EXT_OP_COPY_STACK_TOP); } +#if WASM_ENABLE_SIMDE != 0 + else if (local_type == VALUE_TYPE_V128) { + if (loader_ctx->p_code_compiled) + loader_ctx->preserved_local_offset += 4; + emit_label(EXT_OP_COPY_STACK_TOP_V128); + } +#endif else { if (loader_ctx->p_code_compiled) loader_ctx->preserved_local_offset += 2; @@ -9123,10 +9135,15 @@ preserve_referenced_local(WASMLoaderContext *loader_ctx, uint8 opcode, loader_ctx->frame_offset_bottom[i] = preserved_offset; } - if (is_32bit_type(cur_type)) + if (cur_type == VALUE_TYPE_V128) { + i += 4; + } + else if (is_32bit_type(cur_type)) { i++; - else + } + else { i += 2; + } } (void)error_buf; @@ -9155,7 +9172,10 @@ preserve_local_for_block(WASMLoaderContext *loader_ctx, uint8 opcode, return false; } - if (is_32bit_type(cur_type)) { + if (cur_type == VALUE_TYPE_V128) { + i += 4; + } + else if (is_32bit_type(cur_type)) { i++; } else { @@ -9498,6 +9518,8 @@ wasm_loader_get_const_offset(WASMLoaderContext *ctx, uint8 type, void *value, || (type == VALUE_TYPE_EXTERNREF && *(int32 *)value == c->value.i32) #endif + || (type == VALUE_TYPE_V128 + && (0 == memcmp(value, &(c->value.v128), sizeof(V128)))) || (type == VALUE_TYPE_F64 && (0 == memcmp(value, &(c->value.f64), sizeof(float64)))) || (type == VALUE_TYPE_F32 @@ -9508,6 +9530,9 @@ wasm_loader_get_const_offset(WASMLoaderContext *ctx, uint8 type, void *value, } if (is_32bit_type(c->value_type)) operand_offset += 1; + else if (c->value_type == VALUE_TYPE_V128) { + operand_offset += 4; + } else operand_offset += 2; } @@ -9559,6 +9584,11 @@ wasm_loader_get_const_offset(WASMLoaderContext *ctx, uint8 type, void *value, c->value.i32 = *(int32 *)value; ctx->const_cell_num++; break; + case VALUE_TYPE_V128: + bh_memcpy_s(&(c->value.v128), sizeof(WASMValue), value, + sizeof(V128)); + ctx->const_cell_num++; + break; #if WASM_ENABLE_REF_TYPES != 0 && WASM_ENABLE_GC == 0 case VALUE_TYPE_EXTERNREF: case VALUE_TYPE_FUNCREF: @@ -9760,17 +9790,23 @@ reserve_block_ret(WASMLoaderContext *loader_ctx, uint8 opcode, block_type, &return_types, &reftype_maps, &reftype_map_count); #endif - /* If there is only one return value, use EXT_OP_COPY_STACK_TOP/_I64 instead - * of EXT_OP_COPY_STACK_VALUES for interpreter performance. */ + /* If there is only one return value, use EXT_OP_COPY_STACK_TOP/_I64/V128 + * instead of EXT_OP_COPY_STACK_VALUES for interpreter performance. */ if (return_count == 1) { uint8 cell = (uint8)wasm_value_type_cell_num(return_types[0]); - if (cell <= 2 /* V128 isn't supported whose cell num is 4 */ - && block->dynamic_offset != *(loader_ctx->frame_offset - cell)) { + if (block->dynamic_offset != *(loader_ctx->frame_offset - cell)) { /* insert op_copy before else opcode */ if (opcode == WASM_OP_ELSE) skip_label(); - emit_label(cell == 1 ? EXT_OP_COPY_STACK_TOP - : EXT_OP_COPY_STACK_TOP_I64); +#if WASM_ENABLE_SIMDE != 0 + if (cell == 4) { + emit_label(EXT_OP_COPY_STACK_TOP_V128); + } +#endif + if (cell <= 2) { + emit_label(cell == 1 ? EXT_OP_COPY_STACK_TOP + : EXT_OP_COPY_STACK_TOP_I64); + } emit_operand(loader_ctx, *(loader_ctx->frame_offset - cell)); emit_operand(loader_ctx, block->dynamic_offset); @@ -9805,11 +9841,37 @@ reserve_block_ret(WASMLoaderContext *loader_ctx, uint8 opcode, for (i = (int32)return_count - 1; i >= 0; i--) { uint8 cells = (uint8)wasm_value_type_cell_num(return_types[i]); - frame_offset -= cells; - dynamic_offset -= cells; - if (dynamic_offset != *frame_offset) { - value_count++; - total_cel_num += cells; + if (frame_offset - cells < loader_ctx->frame_offset_bottom) { + set_error_buf(error_buf, error_buf_size, "frame offset underflow"); + goto fail; + } + + if (cells == 4) { + bool needs_copy = false; + int16 v128_dynamic = dynamic_offset - cells; + + for (int j = 0; j < 4; j++) { + if (*(frame_offset - j - 1) != (v128_dynamic + j)) { + needs_copy = true; + break; + } + } + + if (needs_copy) { + value_count++; + total_cel_num += cells; + } + + frame_offset -= cells; + dynamic_offset = v128_dynamic; + } + else { + frame_offset -= cells; + dynamic_offset -= cells; + if (dynamic_offset != *frame_offset) { + value_count++; + total_cel_num += cells; + } } } @@ -9845,19 +9907,50 @@ reserve_block_ret(WASMLoaderContext *loader_ctx, uint8 opcode, dynamic_offset = dynamic_offset_org; for (i = (int32)return_count - 1, j = 0; i >= 0; i--) { uint8 cell = (uint8)wasm_value_type_cell_num(return_types[i]); - frame_offset -= cell; - dynamic_offset -= cell; - if (dynamic_offset != *frame_offset) { - /* cell num */ - cells[j] = cell; - /* src offset */ - src_offsets[j] = *frame_offset; - /* dst offset */ - dst_offsets[j] = dynamic_offset; - j++; + + if (cell == 4) { + bool needs_copy = false; + int16 v128_dynamic = dynamic_offset - cell; + + for (int k = 0; k < 4; k++) { + if (*(frame_offset - k - 1) != (v128_dynamic + k)) { + needs_copy = true; + break; + } + } + + if (needs_copy) { + cells[j] = cell; + src_offsets[j] = *(frame_offset - cell); + dst_offsets[j] = v128_dynamic; + j++; + } + + frame_offset -= cell; + dynamic_offset = v128_dynamic; + } + else { + frame_offset -= cell; + dynamic_offset -= cell; + if (dynamic_offset != *frame_offset) { + cells[j] = cell; + /* src offset */ + src_offsets[j] = *frame_offset; + /* dst offset */ + dst_offsets[j] = dynamic_offset; + j++; + } } + if (opcode == WASM_OP_ELSE) { - *frame_offset = dynamic_offset; + if (cell == 4) { + for (int k = 0; k < cell; k++) { + *(frame_offset + k) = dynamic_offset + k; + } + } + else { + *frame_offset = dynamic_offset; + } } else { loader_ctx->frame_offset = frame_offset; @@ -12944,10 +13037,21 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, emit_label(EXT_OP_SET_LOCAL_FAST); emit_byte(loader_ctx, (uint8)local_offset); } - else { + else if (is_64bit_type(local_type)) { emit_label(EXT_OP_SET_LOCAL_FAST_I64); emit_byte(loader_ctx, (uint8)local_offset); } +#if WASM_ENABLE_SIMDE != 0 + else if (local_type == VALUE_TYPE_V128) { + emit_label(EXT_OP_SET_LOCAL_FAST_V128); + emit_byte(loader_ctx, (uint8)local_offset); + } +#endif + else { + set_error_buf(error_buf, error_buf_size, + "unknown local type"); + goto fail; + } POP_OFFSET_TYPE(local_type); } } @@ -13020,6 +13124,12 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, emit_label(EXT_OP_TEE_LOCAL_FAST); emit_byte(loader_ctx, (uint8)local_offset); } +#if WASM_ENABLE_SIMDE != 0 + else if (local_type == VALUE_TYPE_V128) { + emit_label(EXT_OP_TEE_LOCAL_FAST_V128); + emit_byte(loader_ctx, (uint8)local_offset); + } +#endif else { emit_label(EXT_OP_TEE_LOCAL_FAST_I64); emit_byte(loader_ctx, (uint8)local_offset); @@ -13108,12 +13218,18 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, #endif *p_org = WASM_OP_GET_GLOBAL_64; } -#else /* else of WASM_ENABLE_FAST_INTERP */ +#else /* else of WASM_ENABLE_FAST_INTERP */ if (global_type == VALUE_TYPE_I64 || global_type == VALUE_TYPE_F64) { skip_label(); emit_label(WASM_OP_GET_GLOBAL_64); } +#if WASM_ENABLE_SIMDE != 0 + if (global_type == VALUE_TYPE_V128) { + skip_label(); + emit_label(WASM_OP_GET_GLOBAL_V128); + } +#endif /* end of WASM_ENABLE_SIMDE */ emit_uint32(loader_ctx, global_idx); PUSH_OFFSET_TYPE(global_type); #endif /* end of WASM_ENABLE_FAST_INTERP */ @@ -13197,7 +13313,7 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, func->has_op_set_global_aux_stack = true; #endif } -#else /* else of WASM_ENABLE_FAST_INTERP */ +#else /* else of WASM_ENABLE_FAST_INTERP */ if (global_type == VALUE_TYPE_I64 || global_type == VALUE_TYPE_F64) { skip_label(); @@ -13208,6 +13324,12 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, skip_label(); emit_label(WASM_OP_SET_GLOBAL_AUX_STACK); } +#if WASM_ENABLE_SIMDE != 0 + else if (global_type == VALUE_TYPE_V128) { + skip_label(); + emit_label(WASM_OP_SET_GLOBAL_V128); + } +#endif /* end of WASM_ENABLE_SIMDE */ emit_uint32(loader_ctx, global_idx); POP_OFFSET_TYPE(global_type); #endif /* end of WASM_ENABLE_FAST_INTERP */ diff --git a/core/iwasm/interpreter/wasm_opcode.h b/core/iwasm/interpreter/wasm_opcode.h index 75d30c9b31..9660bb1236 100644 --- a/core/iwasm/interpreter/wasm_opcode.h +++ b/core/iwasm/interpreter/wasm_opcode.h @@ -278,6 +278,15 @@ typedef enum WASMOpcode { DEBUG_OP_BREAK = 0xdc, /* debug break point */ #endif +#if WASM_ENABLE_JIT != 0 \ + || WASM_ENABLE_FAST_INTERP != 0 && WASM_ENABLE_SIMD != 0 + EXT_OP_SET_LOCAL_FAST_V128 = 0xdd, + EXT_OP_TEE_LOCAL_FAST_V128 = 0xde, + EXT_OP_COPY_STACK_TOP_V128 = 0xdf, + WASM_OP_GET_GLOBAL_V128 = 0xe0, + WASM_OP_SET_GLOBAL_V128 = 0xe1, +#endif + /* Post-MVP extend op prefix */ WASM_OP_GC_PREFIX = 0xfb, WASM_OP_MISC_PREFIX = 0xfc, @@ -781,8 +790,7 @@ typedef enum WASMAtomicEXTOpcode { #endif #define SET_GOTO_TABLE_ELEM(opcode) [opcode] = HANDLE_OPCODE(opcode) -#if (WASM_ENABLE_JIT != 0 \ - || (WASM_ENABLE_FAST_INTERP != 0 && WASM_ENABLE_SIMDE != 0)) \ +#if (WASM_ENABLE_JIT != 0 || WASM_ENABLE_FAST_INTERP != 0) \ && WASM_ENABLE_SIMD != 0 #define SET_GOTO_TABLE_SIMD_PREFIX_ELEM() \ SET_GOTO_TABLE_ELEM(WASM_OP_SIMD_PREFIX), @@ -790,6 +798,17 @@ typedef enum WASMAtomicEXTOpcode { #define SET_GOTO_TABLE_SIMD_PREFIX_ELEM() #endif +#if (WASM_ENABLE_FAST_INTERP != 0) && WASM_ENABLE_SIMD != 0 +#define DEF_EXT_V128_HANDLE() \ + SET_GOTO_TABLE_ELEM(EXT_OP_SET_LOCAL_FAST_V128), /* 0xdd */ \ + SET_GOTO_TABLE_ELEM(EXT_OP_TEE_LOCAL_FAST_V128), /* 0xde */ \ + SET_GOTO_TABLE_ELEM(EXT_OP_COPY_STACK_TOP_V128), /* 0xdf */ \ + SET_GOTO_TABLE_ELEM(WASM_OP_GET_GLOBAL_V128), /* 0xe0 */ \ + SET_GOTO_TABLE_ELEM(WASM_OP_SET_GLOBAL_V128), /* 0xe1 */ + +#else +#define DEF_EXT_V128_HANDLE() +#endif /* * Macro used to generate computed goto tables for the C interpreter. */ @@ -1021,7 +1040,7 @@ typedef enum WASMAtomicEXTOpcode { SET_GOTO_TABLE_ELEM(WASM_OP_MISC_PREFIX), /* 0xfc */ \ SET_GOTO_TABLE_SIMD_PREFIX_ELEM() /* 0xfd */ \ SET_GOTO_TABLE_ELEM(WASM_OP_ATOMIC_PREFIX), /* 0xfe */ \ - DEF_DEBUG_BREAK_HANDLE() \ + DEF_DEBUG_BREAK_HANDLE() DEF_EXT_V128_HANDLE() \ }; #ifdef __cplusplus diff --git a/core/iwasm/libraries/simde/simde.cmake b/core/iwasm/libraries/simde/simde.cmake index b36e356945..eeb0e8d1f2 100644 --- a/core/iwasm/libraries/simde/simde.cmake +++ b/core/iwasm/libraries/simde/simde.cmake @@ -4,9 +4,7 @@ set (LIB_SIMDE_DIR ${CMAKE_CURRENT_LIST_DIR}) -if (WAMR_BUILD_TARGET MATCHES "AARCH64.*" OR "ARM.*") - add_definitions (-DWASM_ENABLE_SIMDE=1) -endif () +add_definitions (-DWASM_ENABLE_SIMDE=1) include_directories(${LIB_SIMDE_DIR} ${LIB_SIMDE_DIR}/simde)