Skip to content

Commit 67dce48

Browse files
authored
Enable merged os_mmap for aot data sections and aot text (#3743)
Enable merged os_mmap for aot data sections first, and try enabling merged os_mmap for them and aot text except on platform nuttx and esp-idf. This fixes the issue that aarch64 AOT module fails to load on android: #2274 And also refine os_mmap related code.
1 parent 1329e1d commit 67dce48

File tree

2 files changed

+178
-72
lines changed

2 files changed

+178
-72
lines changed

core/iwasm/aot/aot_loader.c

Lines changed: 171 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,39 @@ loader_malloc(uint64 size, char *error_buf, uint32 error_buf_size)
294294
return mem;
295295
}
296296

297+
static void *
298+
loader_mmap(uint32 size, bool prot_exec, char *error_buf, uint32 error_buf_size)
299+
{
300+
int map_prot =
301+
MMAP_PROT_READ | MMAP_PROT_WRITE | (prot_exec ? MMAP_PROT_EXEC : 0);
302+
int map_flags;
303+
void *mem;
304+
305+
#if UINTPTR_MAX == UINT64_MAX
306+
/* The mmapped AOT data and code in 64-bit targets had better be in
307+
range 0 to 2G, or aot loader may fail to apply some relocations,
308+
e.g., R_X86_64_32/R_X86_64_32S/R_X86_64_PC32/R_RISCV_32.
309+
We try to mmap with MMAP_MAP_32BIT flag first, and if fails, mmap
310+
again without the flag. */
311+
map_flags = MMAP_MAP_32BIT;
312+
if ((mem = os_mmap(NULL, size, map_prot, map_flags,
313+
os_get_invalid_handle()))) {
314+
/* The mmapped memory must be in the first 2 Gigabytes of the
315+
process address space */
316+
bh_assert((uintptr_t)mem < INT32_MAX);
317+
return mem;
318+
}
319+
#endif
320+
321+
map_flags = MMAP_MAP_NONE;
322+
if (!(mem = os_mmap(NULL, size, map_prot, map_flags,
323+
os_get_invalid_handle()))) {
324+
set_error_buf(error_buf, error_buf_size, "allocate memory failed");
325+
return NULL;
326+
}
327+
return mem;
328+
}
329+
297330
static char *
298331
load_string(uint8 **p_buf, const uint8 *buf_end, AOTModule *module,
299332
bool is_load_from_file_buf,
@@ -2378,7 +2411,6 @@ destroy_object_data_sections(AOTObjectDataSection *data_sections,
23782411
}
23792412
}
23802413
#endif
2381-
os_munmap(data_section->data, data_section->size);
23822414
}
23832415
wasm_runtime_free(data_sections);
23842416
}
@@ -2392,6 +2424,9 @@ load_object_data_sections(const uint8 **p_buf, const uint8 *buf_end,
23922424
AOTObjectDataSection *data_sections;
23932425
uint64 size;
23942426
uint32 i;
2427+
uint64 total_size = 0;
2428+
uint32 page_size = os_getpagesize();
2429+
uint8 *merged_sections = NULL;
23952430

23962431
/* Allocate memory */
23972432
size = sizeof(AOTObjectDataSection) * (uint64)module->data_section_count;
@@ -2400,41 +2435,40 @@ load_object_data_sections(const uint8 **p_buf, const uint8 *buf_end,
24002435
return false;
24012436
}
24022437

2403-
/* Create each data section */
2438+
/* First iteration: read data from buf, and calculate total memory needed */
24042439
for (i = 0; i < module->data_section_count; i++) {
2405-
int map_prot = MMAP_PROT_READ | MMAP_PROT_WRITE;
2406-
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) \
2407-
|| defined(BUILD_TARGET_RISCV64_LP64D) \
2408-
|| defined(BUILD_TARGET_RISCV64_LP64)
2409-
/* aot code and data in x86_64 must be in range 0 to 2G due to
2410-
relocation for R_X86_64_32/32S/PC32 */
2411-
int map_flags = MMAP_MAP_32BIT;
2412-
#else
2413-
int map_flags = MMAP_MAP_NONE;
2414-
#endif
2415-
24162440
read_string(buf, buf_end, data_sections[i].name);
24172441
read_uint32(buf, buf_end, data_sections[i].size);
2418-
2442+
CHECK_BUF(buf, buf_end, data_sections[i].size);
2443+
/* Temporary record data ptr for merge, will be replaced after the
2444+
merged_data_sections is mmapped */
2445+
if (data_sections[i].size > 0)
2446+
data_sections[i].data = (uint8 *)buf;
2447+
buf += data_sections[i].size;
2448+
total_size += align_uint64((uint64)data_sections[i].size, page_size);
2449+
}
2450+
if (total_size > UINT32_MAX) {
2451+
set_error_buf(error_buf, error_buf_size, "data sections too large");
2452+
return false;
2453+
}
2454+
if (total_size > 0) {
24192455
/* Allocate memory for data */
2420-
if (data_sections[i].size > 0
2421-
&& !(data_sections[i].data =
2422-
os_mmap(NULL, data_sections[i].size, map_prot, map_flags,
2423-
os_get_invalid_handle()))) {
2424-
set_error_buf(error_buf, error_buf_size, "allocate memory failed");
2456+
merged_sections = module->merged_data_sections =
2457+
loader_mmap((uint32)total_size, false, error_buf, error_buf_size);
2458+
if (!merged_sections) {
24252459
return false;
24262460
}
2427-
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
2428-
#if !defined(BH_PLATFORM_LINUX_SGX) && !defined(BH_PLATFORM_WINDOWS) \
2429-
&& !defined(BH_PLATFORM_DARWIN)
2430-
/* address must be in the first 2 Gigabytes of
2431-
the process address space */
2432-
bh_assert((uintptr_t)data_sections[i].data < INT32_MAX);
2433-
#endif
2434-
#endif
2461+
module->merged_data_sections_size = (uint32)total_size;
2462+
}
24352463

2436-
read_byte_array(buf, buf_end, data_sections[i].data,
2437-
data_sections[i].size);
2464+
/* Second iteration: Create each data section */
2465+
for (i = 0; i < module->data_section_count; i++) {
2466+
if (data_sections[i].size > 0) {
2467+
bh_memcpy_s(merged_sections, data_sections[i].size,
2468+
data_sections[i].data, data_sections[i].size);
2469+
data_sections[i].data = merged_sections;
2470+
merged_sections += align_uint(data_sections[i].size, page_size);
2471+
}
24382472
}
24392473

24402474
*p_buf = buf;
@@ -2532,6 +2566,82 @@ load_init_data_section(const uint8 *buf, const uint8 *buf_end,
25322566
return false;
25332567
}
25342568

2569+
#if !defined(BH_PLATFORM_NUTTX) && !defined(BH_PLATFORM_ESP_IDF)
2570+
static bool
2571+
try_merge_data_and_text(const uint8 **buf, const uint8 **buf_end,
2572+
AOTModule *module, char *error_buf,
2573+
uint32 error_buf_size)
2574+
{
2575+
uint8 *old_buf = (uint8 *)*buf;
2576+
uint8 *old_end = (uint8 *)*buf_end;
2577+
size_t code_size = (size_t)(old_end - old_buf);
2578+
uint32 page_size = os_getpagesize();
2579+
uint64 total_size = 0;
2580+
uint32 i;
2581+
uint8 *sections;
2582+
2583+
if (code_size == 0) {
2584+
return true;
2585+
}
2586+
2587+
/* calculate the total memory needed */
2588+
total_size += align_uint64((uint64)code_size, page_size);
2589+
for (i = 0; i < module->data_section_count; ++i) {
2590+
total_size +=
2591+
align_uint64((uint64)module->data_sections[i].size, page_size);
2592+
}
2593+
/* distance between .data and .text should not be greater than 4GB
2594+
for some targets (e.g. arm64 reloc need < 4G distance) */
2595+
if (total_size > UINT32_MAX) {
2596+
return false;
2597+
}
2598+
/* code_size was checked and must be larger than 0 here */
2599+
bh_assert(total_size > 0);
2600+
2601+
sections = loader_mmap((uint32)total_size, false, NULL, 0);
2602+
if (!sections) {
2603+
/* merge failed but may be not critical for some targets */
2604+
return false;
2605+
}
2606+
/* change the code part to be executable */
2607+
if (os_mprotect(sections, code_size,
2608+
MMAP_PROT_READ | MMAP_PROT_WRITE | MMAP_PROT_EXEC)
2609+
!= 0) {
2610+
os_munmap(sections, (uint32)total_size);
2611+
return false;
2612+
}
2613+
2614+
module->merged_data_text_sections = sections;
2615+
module->merged_data_text_sections_size = (uint32)total_size;
2616+
2617+
/* order not essential just as compiler does: .text section first */
2618+
*buf = sections;
2619+
*buf_end = sections + code_size;
2620+
bh_memcpy_s(sections, code_size, old_buf, code_size);
2621+
os_munmap(old_buf, code_size);
2622+
sections += align_uint((uint32)code_size, page_size);
2623+
2624+
/* then migrate .data sections */
2625+
for (i = 0; i < module->data_section_count; ++i) {
2626+
AOTObjectDataSection *data_section = module->data_sections + i;
2627+
uint8 *old_data = data_section->data;
2628+
data_section->data = sections;
2629+
bh_memcpy_s(data_section->data, data_section->size, old_data,
2630+
data_section->size);
2631+
sections += align_uint(data_section->size, page_size);
2632+
}
2633+
/* free the original data sections */
2634+
if (module->merged_data_sections) {
2635+
os_munmap(module->merged_data_sections,
2636+
module->merged_data_sections_size);
2637+
module->merged_data_sections = NULL;
2638+
module->merged_data_sections_size = 0;
2639+
}
2640+
2641+
return true;
2642+
}
2643+
#endif /* ! defined(BH_PLATFORM_NUTTX) && !defined(BH_PLATFORM_ESP_IDF) */
2644+
25352645
static bool
25362646
load_text_section(const uint8 *buf, const uint8 *buf_end, AOTModule *module,
25372647
char *error_buf, uint32 error_buf_size)
@@ -3391,16 +3501,9 @@ load_relocation_section(const uint8 *buf, const uint8 *buf_end,
33913501
+ sizeof(uint64) * module->real_plt_count
33923502
+ sizeof(uint32) * module->float_plt_count;
33933503
if (size > 0) {
3394-
map_prot = MMAP_PROT_READ | MMAP_PROT_WRITE | MMAP_PROT_EXEC;
3395-
/* aot code and data in x86_64 must be in range 0 to 2G due to
3396-
relocation for R_X86_64_32/32S/PC32 */
3397-
map_flags = MMAP_MAP_32BIT;
3398-
33993504
if (size > UINT32_MAX
3400-
|| !(module->extra_plt_data =
3401-
os_mmap(NULL, (uint32)size, map_prot, map_flags,
3402-
os_get_invalid_handle()))) {
3403-
set_error_buf(error_buf, error_buf_size, "mmap memory failed");
3505+
|| !(module->extra_plt_data = loader_mmap(
3506+
(uint32)size, true, error_buf, error_buf_size))) {
34043507
goto fail;
34053508
}
34063509
module->extra_plt_data_size = (uint32)size;
@@ -3512,19 +3615,12 @@ load_relocation_section(const uint8 *buf, const uint8 *buf_end,
35123615
GOTItem *got_item = module->got_item_list;
35133616
uint32 got_item_idx = 0;
35143617

3515-
map_prot = MMAP_PROT_READ | MMAP_PROT_WRITE;
3516-
/* aot code and data in x86_64 must be in range 0 to 2G due to
3517-
relocation for R_X86_64_32/32S/PC32 */
3518-
map_flags = MMAP_MAP_32BIT;
3519-
35203618
/* Create the GOT for func_ptrs, note that it is different from
35213619
the .got section of a dynamic object file */
35223620
size = (uint64)sizeof(void *) * got_item_count;
35233621
if (size > UINT32_MAX
3524-
|| !(module->got_func_ptrs =
3525-
os_mmap(NULL, (uint32)size, map_prot, map_flags,
3526-
os_get_invalid_handle()))) {
3527-
set_error_buf(error_buf, error_buf_size, "mmap memory failed");
3622+
|| !(module->got_func_ptrs = loader_mmap(
3623+
(uint32)size, false, error_buf, error_buf_size))) {
35283624
goto fail;
35293625
}
35303626

@@ -3749,6 +3845,17 @@ load_from_sections(AOTModule *module, AOTSection *sections,
37493845
return false;
37503846
break;
37513847
case AOT_SECTION_TYPE_TEXT:
3848+
#if !defined(BH_PLATFORM_NUTTX) && !defined(BH_PLATFORM_ESP_IDF)
3849+
/* try to merge .data and .text, with exceptions:
3850+
* 1. XIP mode
3851+
* 2. pre-mmapped module load from aot_load_from_sections()
3852+
* 3. nuttx & esp-idf: have separate region for MMAP_PROT_EXEC
3853+
*/
3854+
if (!module->is_indirect_mode && is_load_from_file_buf)
3855+
if (!try_merge_data_and_text(&buf, &buf_end, module,
3856+
error_buf, error_buf_size))
3857+
LOG_WARNING("merge .data and .text sections failed");
3858+
#endif /* ! defined(BH_PLATFORM_NUTTX) && !defined(BH_PLATFORM_ESP_IDF) */
37523859
if (!load_text_section(buf, buf_end, module, error_buf,
37533860
error_buf_size))
37543861
return false;
@@ -4065,37 +4172,16 @@ create_sections(AOTModule *module, const uint8 *buf, uint32 size,
40654172

40664173
if (section_type == AOT_SECTION_TYPE_TEXT) {
40674174
if ((section_size > 0) && !module->is_indirect_mode) {
4068-
int map_prot =
4069-
MMAP_PROT_READ | MMAP_PROT_WRITE | MMAP_PROT_EXEC;
4070-
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) \
4071-
|| defined(BUILD_TARGET_RISCV64_LP64D) \
4072-
|| defined(BUILD_TARGET_RISCV64_LP64)
4073-
/* aot code and data in x86_64 must be in range 0 to 2G due
4074-
to relocation for R_X86_64_32/32S/PC32 */
4075-
int map_flags = MMAP_MAP_32BIT;
4076-
#else
4077-
int map_flags = MMAP_MAP_NONE;
4078-
#endif
40794175
total_size =
40804176
(uint64)section_size + aot_get_plt_table_size();
40814177
total_size = (total_size + 3) & ~((uint64)3);
40824178
if (total_size >= UINT32_MAX
40834179
|| !(aot_text =
4084-
os_mmap(NULL, (uint32)total_size, map_prot,
4085-
map_flags, os_get_invalid_handle()))) {
4180+
loader_mmap((uint32)total_size, true,
4181+
error_buf, error_buf_size))) {
40864182
wasm_runtime_free(section);
4087-
set_error_buf(error_buf, error_buf_size,
4088-
"mmap memory failed");
40894183
goto fail;
40904184
}
4091-
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
4092-
#if !defined(BH_PLATFORM_LINUX_SGX) && !defined(BH_PLATFORM_WINDOWS) \
4093-
&& !defined(BH_PLATFORM_DARWIN)
4094-
/* address must be in the first 2 Gigabytes of
4095-
the process address space */
4096-
bh_assert((uintptr_t)aot_text < INT32_MAX);
4097-
#endif
4098-
#endif
40994185

41004186
#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
41014187
mirrored_text = os_get_dbus_mirror(aot_text);
@@ -4179,7 +4265,11 @@ load(const uint8 *buf, uint32 size, AOTModule *module,
41794265
if (!ret) {
41804266
/* If load_from_sections() fails, then aot text is destroyed
41814267
in destroy_sections() */
4182-
destroy_sections(section_list, module->is_indirect_mode ? false : true);
4268+
destroy_sections(section_list,
4269+
module->is_indirect_mode
4270+
|| module->merged_data_text_sections
4271+
? false
4272+
: true);
41834273
/* aot_unload() won't destroy aot text again */
41844274
module->code = NULL;
41854275
}
@@ -4329,7 +4419,8 @@ aot_unload(AOTModule *module)
43294419
}
43304420
#endif
43314421

4332-
if (module->code && !module->is_indirect_mode) {
4422+
if (module->code && !module->is_indirect_mode
4423+
&& !module->merged_data_text_sections) {
43334424
/* The layout is: literal size + literal + code (with plt table) */
43344425
uint8 *mmap_addr = module->literal - sizeof(uint32);
43354426
uint32 total_size =
@@ -4364,6 +4455,14 @@ aot_unload(AOTModule *module)
43644455
destroy_object_data_sections(module->data_sections,
43654456
module->data_section_count);
43664457

4458+
if (module->merged_data_sections)
4459+
os_munmap(module->merged_data_sections,
4460+
module->merged_data_sections_size);
4461+
4462+
if (module->merged_data_text_sections)
4463+
os_munmap(module->merged_data_text_sections,
4464+
module->merged_data_text_sections_size);
4465+
43674466
#if WASM_ENABLE_DEBUG_AOT != 0
43684467
jit_code_entry_destroy(module->elf_hdr);
43694468
#endif

core/iwasm/aot/aot_runtime.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,13 @@ typedef struct AOTModule {
315315

316316
/* Whether the underlying wasm binary buffer can be freed */
317317
bool is_binary_freeable;
318+
319+
/* `.data` sections merged into one mmaped to reduce the tlb cache miss */
320+
uint8 *merged_data_sections;
321+
uint32 merged_data_sections_size;
322+
/* `.data` and `.text` sections merged into one large mmaped section */
323+
uint8 *merged_data_text_sections;
324+
uint32 merged_data_text_sections_size;
318325
} AOTModule;
319326

320327
#define AOTMemoryInstance WASMMemoryInstance

0 commit comments

Comments
 (0)