From 2ad14c80e069b8a8c277313e5bef02db6ba93e8e Mon Sep 17 00:00:00 2001 From: Maks Litskevich Date: Fri, 3 Jan 2025 14:24:36 +0000 Subject: [PATCH 01/13] implement local and function calls for v128 in the fast interpreter --- core/iwasm/interpreter/wasm_interp_fast.c | 58 +++++++++++++++++++++-- core/iwasm/interpreter/wasm_loader.c | 13 ++++- core/iwasm/interpreter/wasm_opcode.h | 19 +++++++- 3 files changed, 85 insertions(+), 5 deletions(-) diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index cd9dd91eb8..7015635056 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -1699,6 +1699,11 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, GET_OPERAND(uint64, I64, off)); ret_offset += 2; } + else if (ret_types[ret_idx] == VALUE_TYPE_V128) { + PUT_V128_TO_ADDR(prev_frame->lp + ret_offset, + GET_OPERAND_V128(off)); + ret_offset += 4; + } #if WASM_ENABLE_GC != 0 else if (wasm_is_type_reftype(ret_types[ret_idx])) { PUT_REF_TO_ADDR(prev_frame->lp + ret_offset, @@ -3536,6 +3541,24 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, HANDLE_OP_END(); } +#if WASM_ENABLE_SIMDE != 0 + HANDLE_OP(EXT_OP_SET_LOCAL_FAST_V128) + HANDLE_OP(EXT_OP_TEE_LOCAL_FAST_V128) + { + /* clang-format off */ +#if WASM_CPU_SUPPORTS_UNALIGNED_ADDR_ACCESS != 0 + local_offset = *frame_ip++; +#else + local_offset = *frame_ip; + frame_ip += 2; +#endif + /* clang-format on */ + PUT_V128_TO_ADDR((uint32 *)(frame_lp + local_offset), + GET_OPERAND_V128(0)); + frame_ip += 2; + HANDLE_OP_END(); + } +#endif HANDLE_OP(WASM_OP_GET_GLOBAL) { global_idx = read_uint32(frame_ip); @@ -4884,6 +4907,28 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, HANDLE_OP_END(); } +#if WASM_ENABLE_SIMDE != 0 + HANDLE_OP(EXT_OP_COPY_STACK_TOP_V128) + { + addr1 = GET_OFFSET(); + addr2 = GET_OFFSET(); + + PUT_V128_TO_ADDR(frame_lp + addr2, + GET_V128_FROM_ADDR(frame_lp + addr1)); + +#if WASM_ENABLE_GC != 0 + /* Ignore constants because they are not reference */ + if (addr1 >= 0) { + if (*FRAME_REF(addr1)) { + CLEAR_FRAME_REF(addr1); + SET_FRAME_REF(addr2); + } + } +#endif + + HANDLE_OP_END(); + } +#endif HANDLE_OP(EXT_OP_COPY_STACK_VALUES) { @@ -6079,8 +6124,8 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, #define SIMD_DOUBLE_OP(simde_func) \ do { \ - V128 v1 = POP_V128(); \ V128 v2 = POP_V128(); \ + V128 v1 = POP_V128(); \ addr_ret = GET_OFFSET(); \ \ simde_v128_t simde_result = simde_func(SIMD_V128_TO_SIMDE_V128(v1), \ @@ -6946,6 +6991,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i32x4_add: { + SIMD_DOUBLE_OP(simde_wasm_i32x4_add); break; } @@ -7480,8 +7526,14 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } for (i = 0; i < cur_func->param_count; i++) { - if (cur_func->param_types[i] == VALUE_TYPE_I64 - || cur_func->param_types[i] == VALUE_TYPE_F64) { + if (cur_func->param_types[i] == VALUE_TYPE_V128) { + PUT_V128_TO_ADDR( + outs_area->lp, + GET_OPERAND_V128(2 * (cur_func->param_count - i - 1))); + outs_area->lp += 4; + } + else if (cur_func->param_types[i] == VALUE_TYPE_I64 + || cur_func->param_types[i] == VALUE_TYPE_F64) { PUT_I64_TO_ADDR( outs_area->lp, GET_OPERAND(uint64, I64, diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index a39dff89dc..d7bd34fde7 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -12944,10 +12944,21 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, emit_label(EXT_OP_SET_LOCAL_FAST); emit_byte(loader_ctx, (uint8)local_offset); } - else { + else if (is_64bit_type(local_type)) { emit_label(EXT_OP_SET_LOCAL_FAST_I64); emit_byte(loader_ctx, (uint8)local_offset); } +#if WASM_ENABLE_SIMDE != 0 + else if (local_type == VALUE_TYPE_V128) { + emit_label(EXT_OP_SET_LOCAL_FAST_V128); + emit_byte(loader_ctx, (uint8)local_offset); + } +#endif + else { + set_error_buf(error_buf, error_buf_size, + "unknown local type"); + goto fail; + } POP_OFFSET_TYPE(local_type); } } diff --git a/core/iwasm/interpreter/wasm_opcode.h b/core/iwasm/interpreter/wasm_opcode.h index 75d30c9b31..c3c5e00f80 100644 --- a/core/iwasm/interpreter/wasm_opcode.h +++ b/core/iwasm/interpreter/wasm_opcode.h @@ -278,6 +278,14 @@ typedef enum WASMOpcode { DEBUG_OP_BREAK = 0xdc, /* debug break point */ #endif +#if (WASM_ENABLE_JIT != 0 \ + || (WASM_ENABLE_FAST_INTERP != 0 && WASM_ENABLE_SIMDE != 0)) \ + && WASM_ENABLE_SIMD != 0 + EXT_OP_SET_LOCAL_FAST_V128 = 0xdd, + EXT_OP_TEE_LOCAL_FAST_V128 = 0xde, + EXT_OP_COPY_STACK_TOP_V128 = 0xdf, +#endif + /* Post-MVP extend op prefix */ WASM_OP_GC_PREFIX = 0xfb, WASM_OP_MISC_PREFIX = 0xfc, @@ -790,6 +798,15 @@ typedef enum WASMAtomicEXTOpcode { #define SET_GOTO_TABLE_SIMD_PREFIX_ELEM() #endif +#if (WASM_ENABLE_FAST_INTERP != 0 && WASM_ENABLE_SIMDE != 0) \ + && WASM_ENABLE_SIMD != 0 +#define DEF_EXT_V128_HANDLE() \ + SET_GOTO_TABLE_ELEM(EXT_OP_SET_LOCAL_FAST_V128), \ + SET_GOTO_TABLE_ELEM(EXT_OP_TEE_LOCAL_FAST_V128), \ + SET_GOTO_TABLE_ELEM(EXT_OP_COPY_STACK_TOP_V128), +#else +#define DEF_EXT_V128_HANDLE() +#endif /* * Macro used to generate computed goto tables for the C interpreter. */ @@ -1021,7 +1038,7 @@ typedef enum WASMAtomicEXTOpcode { SET_GOTO_TABLE_ELEM(WASM_OP_MISC_PREFIX), /* 0xfc */ \ SET_GOTO_TABLE_SIMD_PREFIX_ELEM() /* 0xfd */ \ SET_GOTO_TABLE_ELEM(WASM_OP_ATOMIC_PREFIX), /* 0xfe */ \ - DEF_DEBUG_BREAK_HANDLE() \ + DEF_DEBUG_BREAK_HANDLE() DEF_EXT_V128_HANDLE() \ }; #ifdef __cplusplus From 4cb4067b7d2c73e1f8b4d4b2bda2f46e5646b732 Mon Sep 17 00:00:00 2001 From: James Marsh Date: Tue, 21 Jan 2025 18:23:58 +0000 Subject: [PATCH 02/13] Fix splat opcodes, add V128 handling in preserve_referenced_local and reserve_block_ret --- core/iwasm/interpreter/wasm_interp_fast.c | 45 +++++--- core/iwasm/interpreter/wasm_loader.c | 127 ++++++++++++++++++---- 2 files changed, 136 insertions(+), 36 deletions(-) diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index 7015635056..9a026428f3 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -47,7 +47,7 @@ typedef float64 CellType_F64; && (app_addr) <= shared_heap_end_off - bytes + 1) #define shared_heap_addr_app_to_native(app_addr, native_addr) \ - native_addr = shared_heap_base_addr + ((app_addr)-shared_heap_start_off) + native_addr = shared_heap_base_addr + ((app_addr) - shared_heap_start_off) #define CHECK_SHARED_HEAP_OVERFLOW(app_addr, bytes, native_addr) \ if (app_addr_in_shared_heap(app_addr, bytes)) \ @@ -1793,7 +1793,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, else cur_func_type = cur_func->u.func->func_type; - /* clang-format off */ + /* clang-format off */ #if WASM_ENABLE_GC == 0 if (cur_type != cur_func_type) { wasm_set_exception(module, "indirect call type mismatch"); @@ -5923,12 +5923,11 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, uint32 offset, addr; offset = read_uint32(frame_ip); V128 data = POP_V128(); - addr = POP_I32(); + int32 base = POP_I32(); + offset += base; + addr = GET_OPERAND(uint32, I32, 0); - V128 data; - data = POP_V128(); - - CHECK_MEMORY_OVERFLOW(16); + CHECK_MEMORY_OVERFLOW(32); STORE_V128(maddr, data); break; } @@ -5948,14 +5947,14 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, case SIMD_v8x16_shuffle: { V128 indices; - V128 v2 = POP_V128(); - V128 v1 = POP_V128(); - addr_ret = GET_OFFSET(); - bh_memcpy_s(&indices, sizeof(V128), frame_ip, sizeof(V128)); frame_ip += sizeof(V128); + V128 v2 = POP_V128(); + V128 v1 = POP_V128(); + addr_ret = GET_OFFSET(); + V128 result; for (int i = 0; i < 16; i++) { uint8_t index = indices.i8x16[i]; @@ -5983,6 +5982,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, SIMDE_V128_TO_SIMD_V128(simde_result, result); PUT_V128_TO_ADDR(frame_lp + addr_ret, result); + break; } /* Splat */ @@ -6008,7 +6008,15 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, case SIMD_i8x16_splat: { - SIMD_SPLAT_OP_I32(simde_wasm_i8x16_splat); + uint32 val = POP_I32(); + addr_ret = GET_OFFSET(); + + simde_v128_t simde_result = simde_wasm_i8x16_splat(val); + + V128 result; + SIMDE_V128_TO_SIMD_V128(simde_result, result); + + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); break; } case SIMD_i16x8_splat: @@ -6140,7 +6148,18 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, /* i8x16 comparison operations */ case SIMD_i8x16_eq: { - SIMD_DOUBLE_OP(simde_wasm_i8x16_eq); + V128 v2 = POP_V128(); + V128 v1 = POP_V128(); + addr_ret = GET_OFFSET(); + + simde_v128_t simde_result = + simde_wasm_i8x16_eq(SIMD_V128_TO_SIMDE_V128(v1), + SIMD_V128_TO_SIMDE_V128(v2)); + + V128 result; + SIMDE_V128_TO_SIMD_V128(simde_result, result); + + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); break; } case SIMD_i8x16_ne: diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index d7bd34fde7..e19a648da3 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -9125,6 +9125,9 @@ preserve_referenced_local(WASMLoaderContext *loader_ctx, uint8 opcode, if (is_32bit_type(cur_type)) i++; + else if (cur_type == VALUE_TYPE_V128) { + i += 4; + } else i += 2; } @@ -9155,7 +9158,10 @@ preserve_local_for_block(WASMLoaderContext *loader_ctx, uint8 opcode, return false; } - if (is_32bit_type(cur_type)) { + if (cur_type == VALUE_TYPE_V128) { + i += 4; + } + else if (is_32bit_type(cur_type)) { i++; } else { @@ -9498,6 +9504,8 @@ wasm_loader_get_const_offset(WASMLoaderContext *ctx, uint8 type, void *value, || (type == VALUE_TYPE_EXTERNREF && *(int32 *)value == c->value.i32) #endif + || (type == VALUE_TYPE_V128 + && (0 == memcmp(value, &(c->value.v128), sizeof(V128)))) || (type == VALUE_TYPE_F64 && (0 == memcmp(value, &(c->value.f64), sizeof(float64)))) || (type == VALUE_TYPE_F32 @@ -9508,6 +9516,9 @@ wasm_loader_get_const_offset(WASMLoaderContext *ctx, uint8 type, void *value, } if (is_32bit_type(c->value_type)) operand_offset += 1; + else if (c->value_type == VALUE_TYPE_V128) { + operand_offset += 4; + } else operand_offset += 2; } @@ -9559,6 +9570,10 @@ wasm_loader_get_const_offset(WASMLoaderContext *ctx, uint8 type, void *value, c->value.i32 = *(int32 *)value; ctx->const_cell_num++; break; + case VALUE_TYPE_V128: + bh_memcpy_s(&(c->value.v128), sizeof(WASMValue), value, + sizeof(V128)); + ctx->const_cell_num++; #if WASM_ENABLE_REF_TYPES != 0 && WASM_ENABLE_GC == 0 case VALUE_TYPE_EXTERNREF: case VALUE_TYPE_FUNCREF: @@ -9760,17 +9775,22 @@ reserve_block_ret(WASMLoaderContext *loader_ctx, uint8 opcode, block_type, &return_types, &reftype_maps, &reftype_map_count); #endif - /* If there is only one return value, use EXT_OP_COPY_STACK_TOP/_I64 instead - * of EXT_OP_COPY_STACK_VALUES for interpreter performance. */ + /* If there is only one return value, use EXT_OP_COPY_STACK_TOP/_I64/V128 + * instead of EXT_OP_COPY_STACK_VALUES for interpreter performance. */ if (return_count == 1) { uint8 cell = (uint8)wasm_value_type_cell_num(return_types[0]); - if (cell <= 2 /* V128 isn't supported whose cell num is 4 */ - && block->dynamic_offset != *(loader_ctx->frame_offset - cell)) { + if (block->dynamic_offset != *(loader_ctx->frame_offset - cell)) { /* insert op_copy before else opcode */ if (opcode == WASM_OP_ELSE) skip_label(); - emit_label(cell == 1 ? EXT_OP_COPY_STACK_TOP - : EXT_OP_COPY_STACK_TOP_I64); + + if (cell == 4) { + emit_label(EXT_OP_COPY_STACK_TOP_V128); + } + else { + emit_label(cell == 1 ? EXT_OP_COPY_STACK_TOP + : EXT_OP_COPY_STACK_TOP_I64); + } emit_operand(loader_ctx, *(loader_ctx->frame_offset - cell)); emit_operand(loader_ctx, block->dynamic_offset); @@ -9805,11 +9825,37 @@ reserve_block_ret(WASMLoaderContext *loader_ctx, uint8 opcode, for (i = (int32)return_count - 1; i >= 0; i--) { uint8 cells = (uint8)wasm_value_type_cell_num(return_types[i]); - frame_offset -= cells; - dynamic_offset -= cells; - if (dynamic_offset != *frame_offset) { - value_count++; - total_cel_num += cells; + if (frame_offset - cells < loader_ctx->frame_offset_bottom) { + set_error_buf(error_buf, error_buf_size, "frame offset underflow"); + goto fail; + } + + if (cells == 4) { + bool needs_copy = false; + int16 v128_dynamic = dynamic_offset - cells; + + for (int j = 0; j < 4; j++) { + if (*(frame_offset - j - 1) != (v128_dynamic + j)) { + needs_copy = true; + break; + } + } + + if (needs_copy) { + value_count++; + total_cel_num += cells; + } + + frame_offset -= cells; + dynamic_offset = v128_dynamic; + } + else { + frame_offset -= cells; + dynamic_offset -= cells; + if (dynamic_offset != *frame_offset) { + value_count++; + total_cel_num += cells; + } } } @@ -9845,19 +9891,50 @@ reserve_block_ret(WASMLoaderContext *loader_ctx, uint8 opcode, dynamic_offset = dynamic_offset_org; for (i = (int32)return_count - 1, j = 0; i >= 0; i--) { uint8 cell = (uint8)wasm_value_type_cell_num(return_types[i]); - frame_offset -= cell; - dynamic_offset -= cell; - if (dynamic_offset != *frame_offset) { - /* cell num */ - cells[j] = cell; - /* src offset */ - src_offsets[j] = *frame_offset; - /* dst offset */ - dst_offsets[j] = dynamic_offset; - j++; + + if (cell == 4) { + bool needs_copy = false; + int16 v128_dynamic = dynamic_offset - cell; + + for (int k = 0; k < 4; k++) { + if (*(frame_offset - k - 1) != (v128_dynamic + k)) { + needs_copy = true; + break; + } + } + + if (needs_copy) { + cells[j] = cell; + src_offsets[j] = *(frame_offset - cell); + dst_offsets[j] = v128_dynamic; + j++; + } + + frame_offset -= cell; + dynamic_offset = v128_dynamic; + } + else { + frame_offset -= cell; + dynamic_offset -= cell; + if (dynamic_offset != *frame_offset) { + cells[j] = cell; + /* src offset */ + src_offsets[j] = *frame_offset; + /* dst offset */ + dst_offsets[j] = dynamic_offset; + j++; + } } + if (opcode == WASM_OP_ELSE) { - *frame_offset = dynamic_offset; + if (cell == 4) { + for (int k = 0; k < cell; k++) { + *(frame_offset + k) = dynamic_offset + k; + } + } + else { + *frame_offset = dynamic_offset; + } } else { loader_ctx->frame_offset = frame_offset; @@ -13031,6 +13108,10 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, emit_label(EXT_OP_TEE_LOCAL_FAST); emit_byte(loader_ctx, (uint8)local_offset); } + else if (local_type == VALUE_TYPE_V128) { + emit_label(EXT_OP_TEE_LOCAL_FAST_V128); + emit_byte(loader_ctx, (uint8)local_offset); + } else { emit_label(EXT_OP_TEE_LOCAL_FAST_I64); emit_byte(loader_ctx, (uint8)local_offset); From ab6c51169d5e3bcc6ee892dbdc66e9b7c887a702 Mon Sep 17 00:00:00 2001 From: Maks Litskevich Date: Tue, 21 Jan 2025 19:16:13 +0000 Subject: [PATCH 03/13] implement globals --- core/iwasm/interpreter/wasm_interp_fast.c | 27 ++++++++++++++++++++++- core/iwasm/interpreter/wasm_loader.c | 20 ++++++++++++++++- core/iwasm/interpreter/wasm_opcode.h | 7 +++++- 3 files changed, 51 insertions(+), 3 deletions(-) diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index 9a026428f3..d972fee233 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -3595,7 +3595,19 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, GET_I64_FROM_ADDR((uint32 *)global_addr)); HANDLE_OP_END(); } - +#if WASM_ENABLE_SIMDE != 0 + HANDLE_OP(WASM_OP_GET_GLOBAL_128) + { + global_idx = read_uint32(frame_ip); + bh_assert(global_idx < module->e->global_count); + global = globals + global_idx; + global_addr = get_global_addr(global_data, global); + addr_ret = GET_OFFSET(); + PUT_V128_TO_ADDR(frame_lp + addr_ret, + GET_V128_FROM_ADDR((uint32 *)global_addr)); + HANDLE_OP_END(); + } +#endif HANDLE_OP(WASM_OP_SET_GLOBAL) { global_idx = read_uint32(frame_ip); @@ -3662,6 +3674,19 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, GET_I64_FROM_ADDR(frame_lp + addr1)); HANDLE_OP_END(); } +#if WASM_ENABLE_SIMDE != 0 + HANDLE_OP(WASM_OP_SET_GLOBAL_128) + { + global_idx = read_uint32(frame_ip); + bh_assert(global_idx < module->e->global_count); + global = globals + global_idx; + global_addr = get_global_addr(global_data, global); + addr1 = GET_OFFSET(); + PUT_V128_TO_ADDR((uint32 *)global_addr, + GET_V128_FROM_ADDR(frame_lp + addr1)); + HANDLE_OP_END(); + } +#endif /* memory load instructions */ HANDLE_OP(WASM_OP_I32_LOAD) diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index e19a648da3..1a1ec8b355 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -7300,6 +7300,8 @@ wasm_loader_find_block_addr(WASMExecEnv *exec_env, BlockAddr *block_addr_cache, case WASM_OP_SET_GLOBAL: case WASM_OP_GET_GLOBAL_64: case WASM_OP_SET_GLOBAL_64: + case WASM_OP_GET_GLOBAL_128: + case WASM_OP_SET_GLOBAL_128: case WASM_OP_SET_GLOBAL_AUX_STACK: skip_leb_uint32(p, p_end); /* local index */ break; @@ -9111,6 +9113,11 @@ preserve_referenced_local(WASMLoaderContext *loader_ctx, uint8 opcode, loader_ctx->preserved_local_offset++; emit_label(EXT_OP_COPY_STACK_TOP); } + else if (local_type == VALUE_TYPE_V128) { + if (loader_ctx->p_code_compiled) + loader_ctx->preserved_local_offset += 4; + emit_label(EXT_OP_COPY_STACK_TOP_V128); + } else { if (loader_ctx->p_code_compiled) loader_ctx->preserved_local_offset += 2; @@ -13206,9 +13213,14 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, skip_label(); emit_label(WASM_OP_GET_GLOBAL_64); } + + if (global_type == VALUE_TYPE_V128) { + skip_label(); + emit_label(WASM_OP_GET_GLOBAL_128); + } +#endif /* end of WASM_ENABLE_SIMDE */ emit_uint32(loader_ctx, global_idx); PUSH_OFFSET_TYPE(global_type); -#endif /* end of WASM_ENABLE_FAST_INTERP */ break; } @@ -13300,6 +13312,12 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, skip_label(); emit_label(WASM_OP_SET_GLOBAL_AUX_STACK); } +#if WASM_ENABLE_SIMDE != 0 + else if (global_type == VALUE_TYPE_V128) { + skip_label(); + emit_label(WASM_OP_SET_GLOBAL_128); + } +#endif /* end of WASM_ENABLE_SIMDE */ emit_uint32(loader_ctx, global_idx); POP_OFFSET_TYPE(global_type); #endif /* end of WASM_ENABLE_FAST_INTERP */ diff --git a/core/iwasm/interpreter/wasm_opcode.h b/core/iwasm/interpreter/wasm_opcode.h index c3c5e00f80..47036e0db4 100644 --- a/core/iwasm/interpreter/wasm_opcode.h +++ b/core/iwasm/interpreter/wasm_opcode.h @@ -284,6 +284,8 @@ typedef enum WASMOpcode { EXT_OP_SET_LOCAL_FAST_V128 = 0xdd, EXT_OP_TEE_LOCAL_FAST_V128 = 0xde, EXT_OP_COPY_STACK_TOP_V128 = 0xdf, + WASM_OP_GET_GLOBAL_128 = 0xe0, + WASM_OP_SET_GLOBAL_128 = 0xe1, #endif /* Post-MVP extend op prefix */ @@ -803,7 +805,10 @@ typedef enum WASMAtomicEXTOpcode { #define DEF_EXT_V128_HANDLE() \ SET_GOTO_TABLE_ELEM(EXT_OP_SET_LOCAL_FAST_V128), \ SET_GOTO_TABLE_ELEM(EXT_OP_TEE_LOCAL_FAST_V128), \ - SET_GOTO_TABLE_ELEM(EXT_OP_COPY_STACK_TOP_V128), + SET_GOTO_TABLE_ELEM(EXT_OP_COPY_STACK_TOP_V128), \ + SET_GOTO_TABLE_ELEM(WASM_OP_GET_GLOBAL_128), \ + SET_GOTO_TABLE_ELEM(WASM_OP_SET_GLOBAL_128), \ + #else #define DEF_EXT_V128_HANDLE() #endif From 817a746eed48e14f95e38650d975964918cba1c3 Mon Sep 17 00:00:00 2001 From: James Marsh Date: Fri, 24 Jan 2025 18:07:37 +0000 Subject: [PATCH 04/13] Fix incorrect memory overflow values + SIMD ifdefs --- core/iwasm/interpreter/wasm_interp_fast.c | 4 ++-- core/iwasm/interpreter/wasm_loader.c | 15 ++++++++++++--- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index d972fee233..3f107bdb21 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -5909,7 +5909,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, addr = GET_OPERAND(uint32, I32, 0); \ frame_ip += 2; \ addr_ret = GET_OFFSET(); \ - CHECK_MEMORY_OVERFLOW(16); \ + CHECK_MEMORY_OVERFLOW(4); \ \ simde_v128_t simde_result = simde_func(maddr); \ \ @@ -5952,7 +5952,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, offset += base; addr = GET_OPERAND(uint32, I32, 0); - CHECK_MEMORY_OVERFLOW(32); + CHECK_MEMORY_OVERFLOW(4); STORE_V128(maddr, data); break; } diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index 1a1ec8b355..a860b7dedb 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -7300,8 +7300,10 @@ wasm_loader_find_block_addr(WASMExecEnv *exec_env, BlockAddr *block_addr_cache, case WASM_OP_SET_GLOBAL: case WASM_OP_GET_GLOBAL_64: case WASM_OP_SET_GLOBAL_64: +#if WASM_ENABLE_SIMDE != 0 case WASM_OP_GET_GLOBAL_128: case WASM_OP_SET_GLOBAL_128: +#endif case WASM_OP_SET_GLOBAL_AUX_STACK: skip_leb_uint32(p, p_end); /* local index */ break; @@ -9090,6 +9092,7 @@ preserve_referenced_local(WASMLoaderContext *loader_ctx, uint8 opcode, bool *preserved, char *error_buf, uint32 error_buf_size) { + uint32 i = 0; int16 preserved_offset = (int16)local_index; @@ -9113,11 +9116,13 @@ preserve_referenced_local(WASMLoaderContext *loader_ctx, uint8 opcode, loader_ctx->preserved_local_offset++; emit_label(EXT_OP_COPY_STACK_TOP); } +#if WASM_ENABLE_SIMDE != 0 else if (local_type == VALUE_TYPE_V128) { if (loader_ctx->p_code_compiled) loader_ctx->preserved_local_offset += 4; emit_label(EXT_OP_COPY_STACK_TOP_V128); } +#endif else { if (loader_ctx->p_code_compiled) loader_ctx->preserved_local_offset += 2; @@ -9790,11 +9795,12 @@ reserve_block_ret(WASMLoaderContext *loader_ctx, uint8 opcode, /* insert op_copy before else opcode */ if (opcode == WASM_OP_ELSE) skip_label(); - +#if WASM_ENABLE_SIMDE != 0 if (cell == 4) { emit_label(EXT_OP_COPY_STACK_TOP_V128); } - else { +#endif + if (cell <= 2) { emit_label(cell == 1 ? EXT_OP_COPY_STACK_TOP : EXT_OP_COPY_STACK_TOP_I64); } @@ -13115,10 +13121,12 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, emit_label(EXT_OP_TEE_LOCAL_FAST); emit_byte(loader_ctx, (uint8)local_offset); } +#if WASM_ENABLE_SIMDE != 0 else if (local_type == VALUE_TYPE_V128) { emit_label(EXT_OP_TEE_LOCAL_FAST_V128); emit_byte(loader_ctx, (uint8)local_offset); } +#endif else { emit_label(EXT_OP_TEE_LOCAL_FAST_I64); emit_byte(loader_ctx, (uint8)local_offset); @@ -13213,11 +13221,12 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, skip_label(); emit_label(WASM_OP_GET_GLOBAL_64); } - +#if WASM_ENABLE_SIMDE != 0 if (global_type == VALUE_TYPE_V128) { skip_label(); emit_label(WASM_OP_GET_GLOBAL_128); } +#endif #endif /* end of WASM_ENABLE_SIMDE */ emit_uint32(loader_ctx, global_idx); PUSH_OFFSET_TYPE(global_type); From afbebaddc4597056da0bba2060f7307ff52c170f Mon Sep 17 00:00:00 2001 From: James Marsh Date: Wed, 22 Jan 2025 10:09:54 +0000 Subject: [PATCH 05/13] Fix load/load_splat macros --- core/iwasm/interpreter/wasm_interp_fast.c | 74 +++++++++-------------- core/iwasm/interpreter/wasm_loader.c | 12 ++-- 2 files changed, 36 insertions(+), 50 deletions(-) diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index 3f107bdb21..384bc0ad37 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -3604,7 +3604,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, global_addr = get_global_addr(global_data, global); addr_ret = GET_OFFSET(); PUT_V128_TO_ADDR(frame_lp + addr_ret, - GET_V128_FROM_ADDR((uint32 *)global_addr)); + GET_V128_FROM_ADDR((uint32 *)global_addr)); HANDLE_OP_END(); } #endif @@ -3683,7 +3683,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, global_addr = get_global_addr(global_data, global); addr1 = GET_OFFSET(); PUT_V128_TO_ADDR((uint32 *)global_addr, - GET_V128_FROM_ADDR(frame_lp + addr1)); + GET_V128_FROM_ADDR(frame_lp + addr1)); HANDLE_OP_END(); } #endif @@ -5843,66 +5843,54 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, PUT_V128_TO_ADDR(frame_lp + addr_ret, LOAD_V128(maddr)); break; } -#define SIMD_LOAD_OP(op_name, simde_func, element_size, num_elements) \ - do { \ - uint32 offset, addr; \ - offset = read_uint32(frame_ip); \ - addr = GET_OPERAND(uint32, I32, 0); \ - frame_ip += 2; \ - addr_ret = GET_OFFSET(); \ - CHECK_MEMORY_OVERFLOW(16); \ - \ - simde_v128_t simde_result = simde_func(maddr); \ - \ - V128 result; \ - SIMDE_V128_TO_SIMD_V128(simde_result, result); \ - \ - V128 reversed_result; \ - for (int i = 0; i < num_elements; i++) { \ - reversed_result.i##element_size##x##num_elements[i] = \ - result.i##element_size##x##num_elements[num_elements - 1 - i]; \ - } \ - PUT_V128_TO_ADDR(frame_lp + addr_ret, reversed_result); \ - \ - break; \ +#define SIMD_LOAD_OP(simde_func, element_size, num_elements) \ + do { \ + uint32 offset, addr; \ + offset = read_uint32(frame_ip); \ + addr = GET_OPERAND(uint32, I32, 0); \ + frame_ip += 2; \ + addr_ret = GET_OFFSET(); \ + CHECK_MEMORY_OVERFLOW(4); \ + \ + simde_v128_t simde_result = simde_func(maddr); \ + \ + V128 result; \ + SIMDE_V128_TO_SIMD_V128(simde_result, result); \ + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); \ + \ + break; \ } while (0) case SIMD_v128_load8x8_s: { - SIMD_LOAD_OP(SIMD_v128_load8x8_s, - simde_wasm_i16x8_load8x8, 16, 8); + SIMD_LOAD_OP(simde_wasm_i16x8_load8x8, 16, 8); break; } case SIMD_v128_load8x8_u: { - SIMD_LOAD_OP(SIMD_v128_load8x8_u, - simde_wasm_u16x8_load8x8, 16, 8); + SIMD_LOAD_OP(simde_wasm_u16x8_load8x8, 16, 8); break; } case SIMD_v128_load16x4_s: { - SIMD_LOAD_OP(SIMD_v128_load16x4_s, - simde_wasm_i32x4_load16x4, 32, 4); + SIMD_LOAD_OP(simde_wasm_i32x4_load16x4, 32, 4); break; } case SIMD_v128_load16x4_u: { - SIMD_LOAD_OP(SIMD_v128_load16x4_u, - simde_wasm_u32x4_load16x4, 32, 4); + SIMD_LOAD_OP(simde_wasm_u32x4_load16x4, 32, 4); break; } case SIMD_v128_load32x2_s: { - SIMD_LOAD_OP(SIMD_v128_load32x2_s, - simde_wasm_i64x2_load32x2, 64, 2); + SIMD_LOAD_OP(simde_wasm_i64x2_load32x2, 64, 2); break; } case SIMD_v128_load32x2_u: { - SIMD_LOAD_OP(SIMD_v128_load32x2_u, - simde_wasm_u64x2_load32x2, 64, 2); + SIMD_LOAD_OP(simde_wasm_u64x2_load32x2, 64, 2); break; } -#define SIMD_LOAD_SPLAT_OP(op_name, simde_func) \ +#define SIMD_LOAD_SPLAT_OP(simde_func) \ do { \ uint32 offset, addr; \ offset = read_uint32(frame_ip); \ @@ -5921,26 +5909,22 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, case SIMD_v128_load8_splat: { - SIMD_LOAD_SPLAT_OP(SIMD_v128_load8_splat, - simde_wasm_v128_load8_splat); + SIMD_LOAD_SPLAT_OP(simde_wasm_v128_load8_splat); break; } case SIMD_v128_load16_splat: { - SIMD_LOAD_SPLAT_OP(SIMD_v128_load16_splat, - simde_wasm_v128_load16_splat); + SIMD_LOAD_SPLAT_OP(simde_wasm_v128_load16_splat); break; } case SIMD_v128_load32_splat: { - SIMD_LOAD_SPLAT_OP(SIMD_v128_load32_splat, - simde_wasm_v128_load32_splat); + SIMD_LOAD_SPLAT_OP(simde_wasm_v128_load32_splat); break; } case SIMD_v128_load64_splat: { - SIMD_LOAD_SPLAT_OP(SIMD_v128_load64_splat, - simde_wasm_v128_load64_splat); + SIMD_LOAD_SPLAT_OP(simde_wasm_v128_load64_splat); break; } case SIMD_v128_store: diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index a860b7dedb..1eb13d7dd2 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -9135,13 +9135,15 @@ preserve_referenced_local(WASMLoaderContext *loader_ctx, uint8 opcode, loader_ctx->frame_offset_bottom[i] = preserved_offset; } - if (is_32bit_type(cur_type)) - i++; - else if (cur_type == VALUE_TYPE_V128) { + if (cur_type == VALUE_TYPE_V128) { i += 4; } - else + else if (is_32bit_type(cur_type)) { + i++; + } + else { i += 2; + } } (void)error_buf; @@ -13310,7 +13312,7 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, func->has_op_set_global_aux_stack = true; #endif } -#else /* else of WASM_ENABLE_FAST_INTERP */ +#else /* else of WASM_ENABLE_FAST_INTERP */ if (global_type == VALUE_TYPE_I64 || global_type == VALUE_TYPE_F64) { skip_label(); From 2b1875029ba7fd827e0114b4e631351ff4a88f42 Mon Sep 17 00:00:00 2001 From: Maks Litskevich Date: Mon, 27 Jan 2025 15:57:58 +0000 Subject: [PATCH 06/13] formatting commit --- core/iwasm/interpreter/wasm_interp_fast.c | 6 +++--- core/iwasm/interpreter/wasm_loader.c | 2 +- core/iwasm/interpreter/wasm_opcode.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index 384bc0ad37..c8f00aed92 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -47,7 +47,7 @@ typedef float64 CellType_F64; && (app_addr) <= shared_heap_end_off - bytes + 1) #define shared_heap_addr_app_to_native(app_addr, native_addr) \ - native_addr = shared_heap_base_addr + ((app_addr) - shared_heap_start_off) + native_addr = shared_heap_base_addr + ((app_addr)-shared_heap_start_off) #define CHECK_SHARED_HEAP_OVERFLOW(app_addr, bytes, native_addr) \ if (app_addr_in_shared_heap(app_addr, bytes)) \ @@ -1793,7 +1793,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, else cur_func_type = cur_func->u.func->func_type; - /* clang-format off */ + /* clang-format off */ #if WASM_ENABLE_GC == 0 if (cur_type != cur_func_type) { wasm_set_exception(module, "indirect call type mismatch"); @@ -5897,7 +5897,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, addr = GET_OPERAND(uint32, I32, 0); \ frame_ip += 2; \ addr_ret = GET_OFFSET(); \ - CHECK_MEMORY_OVERFLOW(4); \ + CHECK_MEMORY_OVERFLOW(4); \ \ simde_v128_t simde_result = simde_func(maddr); \ \ diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index 1eb13d7dd2..efd95f426f 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -13217,7 +13217,7 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, #endif *p_org = WASM_OP_GET_GLOBAL_64; } -#else /* else of WASM_ENABLE_FAST_INTERP */ +#else /* else of WASM_ENABLE_FAST_INTERP */ if (global_type == VALUE_TYPE_I64 || global_type == VALUE_TYPE_F64) { skip_label(); diff --git a/core/iwasm/interpreter/wasm_opcode.h b/core/iwasm/interpreter/wasm_opcode.h index 47036e0db4..0ddf8153ac 100644 --- a/core/iwasm/interpreter/wasm_opcode.h +++ b/core/iwasm/interpreter/wasm_opcode.h @@ -807,7 +807,7 @@ typedef enum WASMAtomicEXTOpcode { SET_GOTO_TABLE_ELEM(EXT_OP_TEE_LOCAL_FAST_V128), \ SET_GOTO_TABLE_ELEM(EXT_OP_COPY_STACK_TOP_V128), \ SET_GOTO_TABLE_ELEM(WASM_OP_GET_GLOBAL_128), \ - SET_GOTO_TABLE_ELEM(WASM_OP_SET_GLOBAL_128), \ + SET_GOTO_TABLE_ELEM(WASM_OP_SET_GLOBAL_128), #else #define DEF_EXT_V128_HANDLE() From 50e1972035fb3ba036c9a90f6df1b2de96c0a4ac Mon Sep 17 00:00:00 2001 From: Maks Litskevich Date: Mon, 27 Jan 2025 16:03:59 +0000 Subject: [PATCH 07/13] correct endif wasm loader --- core/iwasm/interpreter/wasm_interp_fast.c | 1 - core/iwasm/interpreter/wasm_loader.c | 2 +- core/iwasm/interpreter/wasm_opcode.h | 9 +++------ core/iwasm/libraries/simde/simde.cmake | 7 +++++-- 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index c8f00aed92..ee1ba73c6f 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -7019,7 +7019,6 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i32x4_add: { - SIMD_DOUBLE_OP(simde_wasm_i32x4_add); break; } diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index efd95f426f..4e2d6f3378 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -13228,10 +13228,10 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, skip_label(); emit_label(WASM_OP_GET_GLOBAL_128); } -#endif #endif /* end of WASM_ENABLE_SIMDE */ emit_uint32(loader_ctx, global_idx); PUSH_OFFSET_TYPE(global_type); +#endif /* end of WASM_ENABLE_FAST_INTERP */ break; } diff --git a/core/iwasm/interpreter/wasm_opcode.h b/core/iwasm/interpreter/wasm_opcode.h index 0ddf8153ac..a8118ead0e 100644 --- a/core/iwasm/interpreter/wasm_opcode.h +++ b/core/iwasm/interpreter/wasm_opcode.h @@ -278,8 +278,7 @@ typedef enum WASMOpcode { DEBUG_OP_BREAK = 0xdc, /* debug break point */ #endif -#if (WASM_ENABLE_JIT != 0 \ - || (WASM_ENABLE_FAST_INTERP != 0 && WASM_ENABLE_SIMDE != 0)) \ +#if (WASM_ENABLE_JIT != 0 || (WASM_ENABLE_FAST_INTERP != 0)) \ && WASM_ENABLE_SIMD != 0 EXT_OP_SET_LOCAL_FAST_V128 = 0xdd, EXT_OP_TEE_LOCAL_FAST_V128 = 0xde, @@ -791,8 +790,7 @@ typedef enum WASMAtomicEXTOpcode { #endif #define SET_GOTO_TABLE_ELEM(opcode) [opcode] = HANDLE_OPCODE(opcode) -#if (WASM_ENABLE_JIT != 0 \ - || (WASM_ENABLE_FAST_INTERP != 0 && WASM_ENABLE_SIMDE != 0)) \ +#if (WASM_ENABLE_JIT != 0 || (WASM_ENABLE_FAST_INTERP != 0)) \ && WASM_ENABLE_SIMD != 0 #define SET_GOTO_TABLE_SIMD_PREFIX_ELEM() \ SET_GOTO_TABLE_ELEM(WASM_OP_SIMD_PREFIX), @@ -800,8 +798,7 @@ typedef enum WASMAtomicEXTOpcode { #define SET_GOTO_TABLE_SIMD_PREFIX_ELEM() #endif -#if (WASM_ENABLE_FAST_INTERP != 0 && WASM_ENABLE_SIMDE != 0) \ - && WASM_ENABLE_SIMD != 0 +#if (WASM_ENABLE_FAST_INTERP != 0) && WASM_ENABLE_SIMD != 0 #define DEF_EXT_V128_HANDLE() \ SET_GOTO_TABLE_ELEM(EXT_OP_SET_LOCAL_FAST_V128), \ SET_GOTO_TABLE_ELEM(EXT_OP_TEE_LOCAL_FAST_V128), \ diff --git a/core/iwasm/libraries/simde/simde.cmake b/core/iwasm/libraries/simde/simde.cmake index b36e356945..1219c8e5b1 100644 --- a/core/iwasm/libraries/simde/simde.cmake +++ b/core/iwasm/libraries/simde/simde.cmake @@ -4,9 +4,12 @@ set (LIB_SIMDE_DIR ${CMAKE_CURRENT_LIST_DIR}) -if (WAMR_BUILD_TARGET MATCHES "AARCH64.*" OR "ARM.*") +if (WAMR_BUILD_TARGET MATCHES "AARCH64.*" OR WAMR_BUILD_TARGET MATCHES "ARM.*") add_definitions (-DWASM_ENABLE_SIMDE=1) -endif () +else() + message(WARNING "Disabling SIMD for fast interpreter as the target is not supported") + set(WAMR_BUILD_SIMD 0) +endif() include_directories(${LIB_SIMDE_DIR} ${LIB_SIMDE_DIR}/simde) From 413c493b7e58a9220cba93338fe28c7265852e0f Mon Sep 17 00:00:00 2001 From: Maks Litskevich Date: Tue, 28 Jan 2025 13:02:41 +0000 Subject: [PATCH 08/13] Update core/iwasm/interpreter/wasm_opcode.h Co-authored-by: Marcin Kolny --- core/iwasm/interpreter/wasm_opcode.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/iwasm/interpreter/wasm_opcode.h b/core/iwasm/interpreter/wasm_opcode.h index a8118ead0e..55c94b031b 100644 --- a/core/iwasm/interpreter/wasm_opcode.h +++ b/core/iwasm/interpreter/wasm_opcode.h @@ -798,7 +798,7 @@ typedef enum WASMAtomicEXTOpcode { #define SET_GOTO_TABLE_SIMD_PREFIX_ELEM() #endif -#if (WASM_ENABLE_FAST_INTERP != 0) && WASM_ENABLE_SIMD != 0 +#if WASM_ENABLE_FAST_INTERP != 0 && WASM_ENABLE_SIMD != 0 #define DEF_EXT_V128_HANDLE() \ SET_GOTO_TABLE_ELEM(EXT_OP_SET_LOCAL_FAST_V128), \ SET_GOTO_TABLE_ELEM(EXT_OP_TEE_LOCAL_FAST_V128), \ From a5b987255e083973d697a9b2a89f88c95b1e2938 Mon Sep 17 00:00:00 2001 From: Maks Litskevich Date: Tue, 28 Jan 2025 13:02:48 +0000 Subject: [PATCH 09/13] Update core/iwasm/interpreter/wasm_opcode.h Co-authored-by: Marcin Kolny --- core/iwasm/interpreter/wasm_opcode.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/iwasm/interpreter/wasm_opcode.h b/core/iwasm/interpreter/wasm_opcode.h index 55c94b031b..ceb75f26a3 100644 --- a/core/iwasm/interpreter/wasm_opcode.h +++ b/core/iwasm/interpreter/wasm_opcode.h @@ -790,7 +790,7 @@ typedef enum WASMAtomicEXTOpcode { #endif #define SET_GOTO_TABLE_ELEM(opcode) [opcode] = HANDLE_OPCODE(opcode) -#if (WASM_ENABLE_JIT != 0 || (WASM_ENABLE_FAST_INTERP != 0)) \ +#if (WASM_ENABLE_JIT != 0 || WASM_ENABLE_FAST_INTERP != 0) \ && WASM_ENABLE_SIMD != 0 #define SET_GOTO_TABLE_SIMD_PREFIX_ELEM() \ SET_GOTO_TABLE_ELEM(WASM_OP_SIMD_PREFIX), From d4c3f04d0ae1eedc64778edc5500e18a5e7da68a Mon Sep 17 00:00:00 2001 From: Maks Litskevich Date: Tue, 28 Jan 2025 13:02:57 +0000 Subject: [PATCH 10/13] Update core/iwasm/interpreter/wasm_opcode.h Co-authored-by: Marcin Kolny --- core/iwasm/interpreter/wasm_opcode.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/iwasm/interpreter/wasm_opcode.h b/core/iwasm/interpreter/wasm_opcode.h index ceb75f26a3..c8cdd0c2ff 100644 --- a/core/iwasm/interpreter/wasm_opcode.h +++ b/core/iwasm/interpreter/wasm_opcode.h @@ -803,8 +803,8 @@ typedef enum WASMAtomicEXTOpcode { SET_GOTO_TABLE_ELEM(EXT_OP_SET_LOCAL_FAST_V128), \ SET_GOTO_TABLE_ELEM(EXT_OP_TEE_LOCAL_FAST_V128), \ SET_GOTO_TABLE_ELEM(EXT_OP_COPY_STACK_TOP_V128), \ - SET_GOTO_TABLE_ELEM(WASM_OP_GET_GLOBAL_128), \ - SET_GOTO_TABLE_ELEM(WASM_OP_SET_GLOBAL_128), + SET_GOTO_TABLE_ELEM(WASM_OP_GET_GLOBAL_V128), \ + SET_GOTO_TABLE_ELEM(WASM_OP_SET_GLOBAL_V128), #else #define DEF_EXT_V128_HANDLE() From fad514e65ede8159bf1b208020e23eeea5fd542d Mon Sep 17 00:00:00 2001 From: Maks Litskevich Date: Tue, 28 Jan 2025 13:03:24 +0000 Subject: [PATCH 11/13] Update core/iwasm/interpreter/wasm_opcode.h Co-authored-by: Marcin Kolny --- core/iwasm/interpreter/wasm_opcode.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/iwasm/interpreter/wasm_opcode.h b/core/iwasm/interpreter/wasm_opcode.h index c8cdd0c2ff..176515c570 100644 --- a/core/iwasm/interpreter/wasm_opcode.h +++ b/core/iwasm/interpreter/wasm_opcode.h @@ -278,7 +278,7 @@ typedef enum WASMOpcode { DEBUG_OP_BREAK = 0xdc, /* debug break point */ #endif -#if (WASM_ENABLE_JIT != 0 || (WASM_ENABLE_FAST_INTERP != 0)) \ +#if WASM_ENABLE_JIT != 0 || WASM_ENABLE_FAST_INTERP != 0 \ && WASM_ENABLE_SIMD != 0 EXT_OP_SET_LOCAL_FAST_V128 = 0xdd, EXT_OP_TEE_LOCAL_FAST_V128 = 0xde, From 7fe0f5a9ae2ea0dd1a4cdaac7b3181e106852de7 Mon Sep 17 00:00:00 2001 From: Maks Litskevich Date: Tue, 28 Jan 2025 16:06:15 +0000 Subject: [PATCH 12/13] Fixes --- build-scripts/config_common.cmake | 14 ++++++++++---- build-scripts/runtime_lib.cmake | 10 ++++++++-- core/iwasm/interpreter/wasm_interp_fast.c | 18 +++++++++--------- core/iwasm/interpreter/wasm_loader.c | 9 +++++---- core/iwasm/interpreter/wasm_opcode.h | 22 +++++++++++----------- core/iwasm/libraries/simde/simde.cmake | 7 +------ 6 files changed, 44 insertions(+), 36 deletions(-) diff --git a/build-scripts/config_common.cmake b/build-scripts/config_common.cmake index b6503d808d..3db29e848c 100644 --- a/build-scripts/config_common.cmake +++ b/build-scripts/config_common.cmake @@ -320,12 +320,18 @@ else () message (" Wakeup of blocking operations enabled") endif () if (WAMR_BUILD_SIMD EQUAL 1) - if (NOT WAMR_BUILD_TARGET MATCHES "RISCV64.*") - add_definitions (-DWASM_ENABLE_SIMD=1) - message (" SIMD enabled") - else () + set(SIMD_ENABLED 0) + if (WAMR_BUILD_TARGET MATCHES "RISCV64.*") + set(WAMR_BUILD_SIMD 0) message (" SIMD disabled due to not supported on target RISCV64") + elseif (WAMR_BUILD_FAST_INTERP EQUAL 1 AND WAMR_BUILD_SIMDE EQUAL 0) + set(WAMR_BUILD_SIMD 0) + message(" SIMD disabled as the simde is not built in fast interpreter mode") + else() + set(SIMD_ENABLED 1) + message (" SIMD enabled") endif () + add_definitions(-DWASM_ENABLE_SIMD=${SIMD_ENABLED}) endif () if (WAMR_BUILD_AOT_STACK_FRAME EQUAL 1) add_definitions (-DWASM_ENABLE_AOT_STACK_FRAME=1) diff --git a/build-scripts/runtime_lib.cmake b/build-scripts/runtime_lib.cmake index 29789d671c..ec3a370d61 100644 --- a/build-scripts/runtime_lib.cmake +++ b/build-scripts/runtime_lib.cmake @@ -155,8 +155,14 @@ if (WAMR_BUILD_LIB_RATS EQUAL 1) include (${IWASM_DIR}/libraries/lib-rats/lib_rats.cmake) endif () -if (WAMR_BUILD_LIB_SIMDE EQUAL 1) - include (${IWASM_DIR}/libraries/simde/simde.cmake) +if (WAMR_BUILD_SIMD EQUAL 1 AND WAMR_BUILD_FAST_INTERP EQUAL 1) + if (NOT (WAMR_BUILD_TARGET MATCHES "AARCH64.*" OR WAMR_BUILD_TARGET MATCHES "ARM.*")) + message(STATUS "SIMDe doesnt support platform " ${WAMR_BUILD_TARGET}) + set(WAMR_BUILD_SIMDE 0) + else() + include (${IWASM_DIR}/libraries/simde/simde.cmake) + set (WAMR_BUILD_SIMDE 1) + endif() endif () if (WAMR_BUILD_WASM_CACHE EQUAL 1) diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index ee1ba73c6f..5aee64b4df 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -3541,7 +3541,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, HANDLE_OP_END(); } -#if WASM_ENABLE_SIMDE != 0 +#if WASM_ENABLE_SIMD != 0 HANDLE_OP(EXT_OP_SET_LOCAL_FAST_V128) HANDLE_OP(EXT_OP_TEE_LOCAL_FAST_V128) { @@ -3595,8 +3595,8 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, GET_I64_FROM_ADDR((uint32 *)global_addr)); HANDLE_OP_END(); } -#if WASM_ENABLE_SIMDE != 0 - HANDLE_OP(WASM_OP_GET_GLOBAL_128) +#if WASM_ENABLE_SIMD != 0 + HANDLE_OP(WASM_OP_GET_GLOBAL_V128) { global_idx = read_uint32(frame_ip); bh_assert(global_idx < module->e->global_count); @@ -3675,7 +3675,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, HANDLE_OP_END(); } #if WASM_ENABLE_SIMDE != 0 - HANDLE_OP(WASM_OP_SET_GLOBAL_128) + HANDLE_OP(WASM_OP_SET_GLOBAL_V128) { global_idx = read_uint32(frame_ip); bh_assert(global_idx < module->e->global_count); @@ -4932,7 +4932,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, HANDLE_OP_END(); } -#if WASM_ENABLE_SIMDE != 0 +#if WASM_ENABLE_SIMD != 0 HANDLE_OP(EXT_OP_COPY_STACK_TOP_V128) { addr1 = GET_OFFSET(); @@ -5837,7 +5837,8 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, { uint32 offset, addr; offset = read_uint32(frame_ip); - addr = POP_I32(); + addr = GET_OPERAND(uint32, I32, 0); + frame_ip += 2; addr_ret = GET_OFFSET(); CHECK_MEMORY_OVERFLOW(16); PUT_V128_TO_ADDR(frame_lp + addr_ret, LOAD_V128(maddr)); @@ -5850,7 +5851,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, addr = GET_OPERAND(uint32, I32, 0); \ frame_ip += 2; \ addr_ret = GET_OFFSET(); \ - CHECK_MEMORY_OVERFLOW(4); \ + CHECK_MEMORY_OVERFLOW(16); \ \ simde_v128_t simde_result = simde_func(maddr); \ \ @@ -5858,7 +5859,6 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, SIMDE_V128_TO_SIMD_V128(simde_result, result); \ PUT_V128_TO_ADDR(frame_lp + addr_ret, result); \ \ - break; \ } while (0) case SIMD_v128_load8x8_s: { @@ -5936,7 +5936,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, offset += base; addr = GET_OPERAND(uint32, I32, 0); - CHECK_MEMORY_OVERFLOW(4); + CHECK_MEMORY_OVERFLOW(16); STORE_V128(maddr, data); break; } diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index 4e2d6f3378..eaba5555b0 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -7301,8 +7301,8 @@ wasm_loader_find_block_addr(WASMExecEnv *exec_env, BlockAddr *block_addr_cache, case WASM_OP_GET_GLOBAL_64: case WASM_OP_SET_GLOBAL_64: #if WASM_ENABLE_SIMDE != 0 - case WASM_OP_GET_GLOBAL_128: - case WASM_OP_SET_GLOBAL_128: + case WASM_OP_GET_GLOBAL_V128: + case WASM_OP_SET_GLOBAL_V128: #endif case WASM_OP_SET_GLOBAL_AUX_STACK: skip_leb_uint32(p, p_end); /* local index */ @@ -9588,6 +9588,7 @@ wasm_loader_get_const_offset(WASMLoaderContext *ctx, uint8 type, void *value, bh_memcpy_s(&(c->value.v128), sizeof(WASMValue), value, sizeof(V128)); ctx->const_cell_num++; + break; #if WASM_ENABLE_REF_TYPES != 0 && WASM_ENABLE_GC == 0 case VALUE_TYPE_EXTERNREF: case VALUE_TYPE_FUNCREF: @@ -13226,7 +13227,7 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, #if WASM_ENABLE_SIMDE != 0 if (global_type == VALUE_TYPE_V128) { skip_label(); - emit_label(WASM_OP_GET_GLOBAL_128); + emit_label(WASM_OP_GET_GLOBAL_V128); } #endif /* end of WASM_ENABLE_SIMDE */ emit_uint32(loader_ctx, global_idx); @@ -13326,7 +13327,7 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, #if WASM_ENABLE_SIMDE != 0 else if (global_type == VALUE_TYPE_V128) { skip_label(); - emit_label(WASM_OP_SET_GLOBAL_128); + emit_label(WASM_OP_SET_GLOBAL_V128); } #endif /* end of WASM_ENABLE_SIMDE */ emit_uint32(loader_ctx, global_idx); diff --git a/core/iwasm/interpreter/wasm_opcode.h b/core/iwasm/interpreter/wasm_opcode.h index 176515c570..9660bb1236 100644 --- a/core/iwasm/interpreter/wasm_opcode.h +++ b/core/iwasm/interpreter/wasm_opcode.h @@ -278,13 +278,13 @@ typedef enum WASMOpcode { DEBUG_OP_BREAK = 0xdc, /* debug break point */ #endif -#if WASM_ENABLE_JIT != 0 || WASM_ENABLE_FAST_INTERP != 0 \ - && WASM_ENABLE_SIMD != 0 +#if WASM_ENABLE_JIT != 0 \ + || WASM_ENABLE_FAST_INTERP != 0 && WASM_ENABLE_SIMD != 0 EXT_OP_SET_LOCAL_FAST_V128 = 0xdd, EXT_OP_TEE_LOCAL_FAST_V128 = 0xde, EXT_OP_COPY_STACK_TOP_V128 = 0xdf, - WASM_OP_GET_GLOBAL_128 = 0xe0, - WASM_OP_SET_GLOBAL_128 = 0xe1, + WASM_OP_GET_GLOBAL_V128 = 0xe0, + WASM_OP_SET_GLOBAL_V128 = 0xe1, #endif /* Post-MVP extend op prefix */ @@ -798,13 +798,13 @@ typedef enum WASMAtomicEXTOpcode { #define SET_GOTO_TABLE_SIMD_PREFIX_ELEM() #endif -#if WASM_ENABLE_FAST_INTERP != 0 && WASM_ENABLE_SIMD != 0 -#define DEF_EXT_V128_HANDLE() \ - SET_GOTO_TABLE_ELEM(EXT_OP_SET_LOCAL_FAST_V128), \ - SET_GOTO_TABLE_ELEM(EXT_OP_TEE_LOCAL_FAST_V128), \ - SET_GOTO_TABLE_ELEM(EXT_OP_COPY_STACK_TOP_V128), \ - SET_GOTO_TABLE_ELEM(WASM_OP_GET_GLOBAL_V128), \ - SET_GOTO_TABLE_ELEM(WASM_OP_SET_GLOBAL_V128), +#if (WASM_ENABLE_FAST_INTERP != 0) && WASM_ENABLE_SIMD != 0 +#define DEF_EXT_V128_HANDLE() \ + SET_GOTO_TABLE_ELEM(EXT_OP_SET_LOCAL_FAST_V128), /* 0xdd */ \ + SET_GOTO_TABLE_ELEM(EXT_OP_TEE_LOCAL_FAST_V128), /* 0xde */ \ + SET_GOTO_TABLE_ELEM(EXT_OP_COPY_STACK_TOP_V128), /* 0xdf */ \ + SET_GOTO_TABLE_ELEM(WASM_OP_GET_GLOBAL_V128), /* 0xe0 */ \ + SET_GOTO_TABLE_ELEM(WASM_OP_SET_GLOBAL_V128), /* 0xe1 */ #else #define DEF_EXT_V128_HANDLE() diff --git a/core/iwasm/libraries/simde/simde.cmake b/core/iwasm/libraries/simde/simde.cmake index 1219c8e5b1..eeb0e8d1f2 100644 --- a/core/iwasm/libraries/simde/simde.cmake +++ b/core/iwasm/libraries/simde/simde.cmake @@ -4,12 +4,7 @@ set (LIB_SIMDE_DIR ${CMAKE_CURRENT_LIST_DIR}) -if (WAMR_BUILD_TARGET MATCHES "AARCH64.*" OR WAMR_BUILD_TARGET MATCHES "ARM.*") - add_definitions (-DWASM_ENABLE_SIMDE=1) -else() - message(WARNING "Disabling SIMD for fast interpreter as the target is not supported") - set(WAMR_BUILD_SIMD 0) -endif() +add_definitions (-DWASM_ENABLE_SIMDE=1) include_directories(${LIB_SIMDE_DIR} ${LIB_SIMDE_DIR}/simde) From 993ff54ac65968e0e5b1bb0d815ab317e24a5495 Mon Sep 17 00:00:00 2001 From: Maks Litskevich Date: Fri, 24 Jan 2025 12:27:33 +0000 Subject: [PATCH 13/13] Fix load/store Fix v128 load/store style --- core/iwasm/interpreter/wasm_interp_fast.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index 5aee64b4df..6dc9351e03 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -5837,8 +5837,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, { uint32 offset, addr; offset = read_uint32(frame_ip); - addr = GET_OPERAND(uint32, I32, 0); - frame_ip += 2; + addr = POP_I32(); addr_ret = GET_OFFSET(); CHECK_MEMORY_OVERFLOW(16); PUT_V128_TO_ADDR(frame_lp + addr_ret, LOAD_V128(maddr)); @@ -5932,9 +5931,10 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, uint32 offset, addr; offset = read_uint32(frame_ip); V128 data = POP_V128(); - int32 base = POP_I32(); - offset += base; - addr = GET_OPERAND(uint32, I32, 0); + addr = POP_I32(); + + V128 data; + data = POP_V128(); CHECK_MEMORY_OVERFLOW(16); STORE_V128(maddr, data);