Skip to content

Commit 3416bb5

Browse files
committed
Merge remote-tracking branch 'origin/sycl' into abi/submission_info
2 parents 8e014e5 + 7bdfb08 commit 3416bb5

File tree

86 files changed

+2818
-1357
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

86 files changed

+2818
-1357
lines changed

.github/workflows/sycl-ur-perf-benchmarking.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ on:
2424
- Full
2525
- SYCL
2626
- Minimal
27+
- Core
2728
- Normal
2829
- Test
2930
- Gromacs

clang/test/Driver/sycl-offload-new-driver.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@
237237

238238
// Verify for multiple targets with -Xsycl-target-backend= with commas in the values
239239
// are passed correctly to llvm-offload-binary.
240-
// RUN: %clangxx -fsycl -### --offload-new-driver \
240+
// RUN: %clangxx -fsycl -### --offload-new-driver -fno-sycl-libspirv \
241241
// RUN: -fsycl-targets=nvptx64-nvidia-cuda,amdgcn-amd-amdhsa,spir64_gen \
242242
// RUN: -Xsycl-target-backend=amdgcn-amd-amdhsa --offload-arch=gfx908,gfx1010 \
243243
// RUN: -Xsycl-target-backend=nvptx64-nvidia-cuda --offload-arch=sm_86,sm_87,sm_89 \

devops/scripts/benchmarks/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ The available benchmarks options are:
115115
* `Full` (BenchDNN, Compute, Gromacs, llama, SYCL, Velocity and UMF benchmarks)
116116
* `SYCL` (Compute, llama, SYCL, Velocity)
117117
* `Minimal` (Compute)
118+
* `Core` (Compute: SubmitKernel)
118119
* `Normal` (BenchDNN, Compute, Gromacs, llama, Velocity)
119120
* `Gromacs` (Gromacs)
120121
* `OneDNN` (BenchDNN)

devops/scripts/benchmarks/benches/compute.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,41 @@ def createRrBench(variant_name: str, **kwargs):
353353
return benches
354354

355355

356+
class ComputeBenchCoreSuite(ComputeBench):
357+
"""
358+
A suite for core compute benchmarks scenarios for quick runs.
359+
"""
360+
361+
def name(self) -> str:
362+
return "Compute Benchmarks Core"
363+
364+
def benchmarks(self) -> list[Benchmark]:
365+
core_benches = []
366+
submit_kernel_params = product(
367+
list(RUNTIMES),
368+
[0, 1], # in_order_queue
369+
[0, 1], # measure_completion
370+
[0, 1], # use_events
371+
)
372+
for (
373+
runtime,
374+
in_order_queue,
375+
measure_completion,
376+
use_events,
377+
) in submit_kernel_params:
378+
core_benches.append(
379+
SubmitKernel(
380+
self,
381+
runtime,
382+
in_order_queue,
383+
measure_completion,
384+
use_events,
385+
KernelExecTime=1,
386+
)
387+
)
388+
return core_benches
389+
390+
356391
class ComputeBenchmark(Benchmark):
357392
def __init__(
358393
self,

devops/scripts/benchmarks/compare.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,7 @@ def print_regression(entry: dict, is_warning: bool = False):
411411
log_func(f"-- Delta: {entry['delta']}")
412412
log_func("")
413413
if args.produce_github_summary:
414-
gh_summary.append(f"#### {entry['name']}:")
414+
gh_summary.append(f"##### {entry['name']}:")
415415
gh_summary.append(
416416
f"- Historic {entry['avg_type']}: {entry['hist_avg']}"
417417
)
@@ -427,12 +427,16 @@ def print_regression(entry: dict, is_warning: bool = False):
427427
)
428428
gh_summary.append("")
429429

430+
if args.produce_github_summary:
431+
gh_summary.append("")
432+
gh_summary.append("### Regressions and Improvements")
433+
430434
if improvements:
431435
log.info("#")
432436
log.info("# Improvements:")
433437
log.info("#")
434438
if args.produce_github_summary:
435-
gh_summary.append(f"### Improvements")
439+
gh_summary.append(f"#### Improvements")
436440
gh_summary.append(
437441
f"<details><summary>{len(improvements)} improved tests:</summary>"
438442
)
@@ -444,12 +448,16 @@ def print_regression(entry: dict, is_warning: bool = False):
444448
gh_summary.append("")
445449
if regressions_ignored:
446450
log.info("#")
447-
log.info("# Regressions (filtered out by --regression-filter):")
451+
log.info(
452+
f"# Regressions Ignored (filtered out by --regression-filter: {filter_type_capitalized})"
453+
)
448454
log.info("#")
449455
if args.produce_github_summary:
450-
gh_summary.append(f"### Non-{filter_type_capitalized} Regressions")
451456
gh_summary.append(
452-
f"<details><summary>{len(regressions_ignored)} non-{args.regression_filter_type} regressions:</summary>"
457+
f"#### Regressions Ignored (filtered out by --regression-filter: {filter_type_capitalized})"
458+
)
459+
gh_summary.append(
460+
f"<details><summary>{len(regressions_ignored)} non-'{args.regression_filter_type}' regressions:</summary>"
453461
)
454462
gh_summary.append("")
455463
for test in regressions_ignored:
@@ -462,7 +470,7 @@ def print_regression(entry: dict, is_warning: bool = False):
462470
log.warning("# Regressions:")
463471
log.warning("#")
464472
if args.produce_github_summary:
465-
gh_summary.append(f"### {filter_type_capitalized} Regressions")
473+
gh_summary.append(f"#### {filter_type_capitalized} Regressions")
466474
gh_summary.append(
467475
f"{len(regressions_of_concern)} {args.regression_filter_type} regressions. These regressions warrant a CI failure:"
468476
)
@@ -480,8 +488,6 @@ def print_regression(entry: dict, is_warning: bool = False):
480488

481489
log.info("No unexpected regressions found!")
482490
if args.produce_github_summary:
483-
gh_summary.append("")
484-
gh_summary.append("### Regressions")
485491
gh_summary.append("No unexpected regressions found!")
486492
with open(options.github_summary_regression_filename, "w") as f:
487493
f.write("\n".join(gh_summary))

devops/scripts/benchmarks/main.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,7 @@ def main(directory, additional_env_vars, compare_names, filter, execution_stats)
269269

270270
suites = [
271271
ComputeBench(),
272+
ComputeBenchCoreSuite(),
272273
VelocityBench(),
273274
SyclBench(),
274275
LlamaCppBench(),

devops/scripts/benchmarks/presets.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@
2626
"Minimal": [
2727
"Compute Benchmarks",
2828
],
29+
"Core": [
30+
"Compute Benchmarks Core",
31+
],
2932
"Normal": [
3033
"BenchDNN",
3134
"Compute Benchmarks",

llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp

Lines changed: 44 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -27,24 +27,21 @@ using namespace llvm;
2727

2828
namespace {
2929

30-
constexpr StringRef SYCL_HOST_ACCESS_ATTR = "sycl-host-access";
31-
constexpr StringRef SYCL_PIPELINED_ATTR = "sycl-pipelined";
32-
constexpr StringRef SYCL_REGISTER_ALLOC_MODE_ATTR = "sycl-register-alloc-mode";
33-
constexpr StringRef SYCL_GRF_SIZE_ATTR = "sycl-grf-size";
30+
constexpr StringRef SyclHostAccessAttr = "sycl-host-access";
31+
constexpr StringRef SyclPipelinedAttr = "sycl-pipelined";
32+
constexpr StringRef SyclRegisterAllocModeAttr = "sycl-register-alloc-mode";
33+
constexpr StringRef SyclGrfSizeAttr = "sycl-grf-size";
3434

35-
constexpr StringRef SPIRV_DECOR_MD_KIND = "spirv.Decorations";
36-
constexpr StringRef SPIRV_PARAM_DECOR_MD_KIND = "spirv.ParameterDecorations";
35+
constexpr StringRef SpirvDecorMdKind = "spirv.Decorations";
36+
constexpr StringRef SpirvParamDecorMdKind = "spirv.ParameterDecorations";
3737
// The corresponding SPIR-V OpCode for the host_access property is documented
3838
// in the SPV_INTEL_global_variable_decorations design document:
3939
// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/DeviceGlobal/SPV_INTEL_global_variable_decorations.asciidoc#decoration
40-
constexpr uint32_t SPIRV_HOST_ACCESS_DECOR = 6147;
41-
constexpr uint32_t SPIRV_HOST_ACCESS_DEFAULT_VALUE = 2; // Read/Write
40+
constexpr uint32_t SpirvHostAccessDecor = 6147;
41+
constexpr uint32_t SpirvHostAccessDefaultValue = 2; // Read/Write
4242

43-
constexpr uint32_t SPIRV_INITIATION_INTERVAL_DECOR = 5917;
44-
constexpr uint32_t SPIRV_PIPELINE_ENABLE_DECOR = 5919;
45-
46-
constexpr uint32_t SPIRV_CACHE_CONTROL_READ_DECOR = 6442;
47-
constexpr uint32_t SPIRV_CACHE_CONTROL_WRITE_DECOR = 6443;
43+
constexpr uint32_t SpirvInitiationIntervalDecor = 5917;
44+
constexpr uint32_t SpirvPipelineEnableDecor = 5919;
4845

4946
enum class DecorValueTy {
5047
uint32,
@@ -89,12 +86,12 @@ enum FloatControlMask {
8986
// These opcodes are specified in SPIRV specification (SPV_KHR_float_controls
9087
// and SPV_INTEL_float_controls2 extensions):
9188
// https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.pdf
92-
constexpr uint32_t SPIRV_ROUNDING_MODE_RTE = 4462; // RoundingModeRTE
93-
constexpr uint32_t SPIRV_ROUNDING_MODE_RTZ = 4463; // RoundingModeRTZ
94-
constexpr uint32_t SPIRV_ROUNDING_MODE_RTP_INTEL = 5620; // RoundingModeRTPINTEL
95-
constexpr uint32_t SPIRV_ROUNDING_MODE_RTN_INTEL = 5621; // RoundingModeRTNINTEL
96-
constexpr uint32_t SPIRV_DENORM_FLUSH_TO_ZERO = 4460; // DenormFlushToZero
97-
constexpr uint32_t SPIRV_DENORM_PRESERVE = 4459; // DenormPreserve
89+
constexpr uint32_t SpirvRoundingModeRte = 4462; // RoundingModeRTE
90+
constexpr uint32_t SpirvRoundingModeRtz = 4463; // RoundingModeRTZ
91+
constexpr uint32_t SpirvRoundingModeRtpIntel = 5620; // RoundingModeRTPINTEL
92+
constexpr uint32_t SpirvRoundingModeRtnIntel = 5621; // RoundingModeRTNINTEL
93+
constexpr uint32_t SpirvDenormFlushToZero = 4460; // DenormFlushToZero
94+
constexpr uint32_t SpirvDenormPreserve = 4459; // DenormPreserve
9895

9996
/// Builds a metadata node for a SPIR-V decoration (decoration code is
10097
/// \c uint32_t integers) with no value.
@@ -147,15 +144,15 @@ MDNode *buildSpirvDecorCacheProp(LLVMContext &Ctx, StringRef Name,
147144
uint32_t OpCode, uint32_t CacheMode,
148145
uint32_t CacheLevel) {
149146
// SPIR-V encodings of read control
150-
enum cache_control_read_type {
147+
enum CacheControlReadType {
151148
read_uncached = 0,
152149
read_cached = 1,
153150
read_streaming = 2,
154151
read_invalidate = 3,
155152
read_const_cached = 4
156153
};
157154
// SPIR-V encodings of write control
158-
enum cache_control_write_type {
155+
enum CacheControlWriteType {
159156
write_uncached = 0,
160157
write_through = 1,
161158
write_back = 2,
@@ -338,28 +335,28 @@ attributeToExecModeMetadata(const Attribute &Attr, Function &F) {
338335
};
339336

340337
if (IsFPModeSet(RTE))
341-
AddFPControlMetadata(SPIRV_ROUNDING_MODE_RTE);
338+
AddFPControlMetadata(SpirvRoundingModeRte);
342339

343340
if (IsFPModeSet(RTP))
344-
AddFPControlMetadata(SPIRV_ROUNDING_MODE_RTP_INTEL);
341+
AddFPControlMetadata(SpirvRoundingModeRtpIntel);
345342

346343
if (IsFPModeSet(RTN))
347-
AddFPControlMetadata(SPIRV_ROUNDING_MODE_RTN_INTEL);
344+
AddFPControlMetadata(SpirvRoundingModeRtnIntel);
348345

349346
if (IsFPModeSet(RTZ))
350-
AddFPControlMetadata(SPIRV_ROUNDING_MODE_RTZ);
347+
AddFPControlMetadata(SpirvRoundingModeRtz);
351348

352349
if (IsFPModeSet(DENORM_FTZ))
353-
AddFPControlMetadata(SPIRV_DENORM_FLUSH_TO_ZERO);
350+
AddFPControlMetadata(SpirvDenormFlushToZero);
354351

355352
if (IsFPModeSet(DENORM_HF_ALLOW))
356-
AddFPControlMetadataForWidth(SPIRV_DENORM_PRESERVE, 16);
353+
AddFPControlMetadataForWidth(SpirvDenormPreserve, 16);
357354

358355
if (IsFPModeSet(DENORM_F_ALLOW))
359-
AddFPControlMetadataForWidth(SPIRV_DENORM_PRESERVE, 32);
356+
AddFPControlMetadataForWidth(SpirvDenormPreserve, 32);
360357

361358
if (IsFPModeSet(DENORM_D_ALLOW))
362-
AddFPControlMetadataForWidth(SPIRV_DENORM_PRESERVE, 64);
359+
AddFPControlMetadataForWidth(SpirvDenormPreserve, 64);
363360
}
364361

365362
static constexpr std::tuple<const char *, const char *> SimpleWGAttrs[] = {
@@ -483,12 +480,12 @@ attributeToExecModeMetadata(const Attribute &Attr, Function &F) {
483480
MDNode::get(Ctx, ClusterMDArgs));
484481
}
485482

486-
if ((AttrKindStr == SYCL_REGISTER_ALLOC_MODE_ATTR ||
487-
AttrKindStr == SYCL_GRF_SIZE_ATTR) &&
483+
if ((AttrKindStr == SyclRegisterAllocModeAttr ||
484+
AttrKindStr == SyclGrfSizeAttr) &&
488485
!llvm::esimd::isESIMD(F)) {
489486
// TODO: Remove SYCL_REGISTER_ALLOC_MODE_ATTR support in next ABI break.
490487
uint32_t PropVal = getAttributeAsInteger<uint32_t>(Attr);
491-
if (AttrKindStr == SYCL_GRF_SIZE_ATTR) {
488+
if (AttrKindStr == SyclGrfSizeAttr) {
492489
// The RegisterAllocMode metadata supports only 0, 128, and 256 for
493490
// PropVal.
494491
if (PropVal != 0 && PropVal != 128 && PropVal != 256)
@@ -570,9 +567,9 @@ void getUserListIgnoringCast(
570567
PreservedAnalyses CompileTimePropertiesPass::run(Module &M,
571568
ModuleAnalysisManager &MAM) {
572569
LLVMContext &Ctx = M.getContext();
573-
unsigned MDKindID = Ctx.getMDKindID(SPIRV_DECOR_MD_KIND);
570+
unsigned MDKindID = Ctx.getMDKindID(SpirvDecorMdKind);
574571
bool CompileTimePropertiesMet = false;
575-
unsigned MDParamKindID = Ctx.getMDKindID(SPIRV_PARAM_DECOR_MD_KIND);
572+
unsigned MDParamKindID = Ctx.getMDKindID(SpirvParamDecorMdKind);
576573

577574
// Let's process all the globals
578575
for (auto &GV : M.globals()) {
@@ -594,19 +591,18 @@ PreservedAnalyses CompileTimePropertiesPass::run(Module &M,
594591
// of the variable.
595592
if (isDeviceGlobalVariable(GV)) {
596593
auto HostAccessDecorValue =
597-
GV.hasAttribute(SYCL_HOST_ACCESS_ATTR)
598-
? getAttributeAsInteger<uint32_t>(GV, SYCL_HOST_ACCESS_ATTR)
599-
: SPIRV_HOST_ACCESS_DEFAULT_VALUE;
594+
GV.hasAttribute(SyclHostAccessAttr)
595+
? getAttributeAsInteger<uint32_t>(GV, SyclHostAccessAttr)
596+
: SpirvHostAccessDefaultValue;
600597
auto VarName = getGlobalVariableUniqueId(GV);
601-
MDOps.push_back(buildSpirvDecorMetadata(Ctx, SPIRV_HOST_ACCESS_DECOR,
598+
MDOps.push_back(buildSpirvDecorMetadata(Ctx, SpirvHostAccessDecor,
602599
HostAccessDecorValue, VarName));
603600
}
604601

605602
if (isHostPipeVariable(GV)) {
606603
auto VarName = getGlobalVariableUniqueId(GV);
607-
MDOps.push_back(buildSpirvDecorMetadata(Ctx, SPIRV_HOST_ACCESS_DECOR,
608-
SPIRV_HOST_ACCESS_DEFAULT_VALUE,
609-
VarName));
604+
MDOps.push_back(buildSpirvDecorMetadata(
605+
Ctx, SpirvHostAccessDecor, SpirvHostAccessDefaultValue, VarName));
610606
}
611607

612608
// Add the generated metadata to the variable
@@ -668,26 +664,25 @@ PreservedAnalyses CompileTimePropertiesPass::run(Module &M,
668664
for (const Attribute &Attribute : F.getAttributes().getFnAttrs()) {
669665
// Handle pipelined attribute as a special case.
670666
if (Attribute.isStringAttribute() &&
671-
Attribute.getKindAsString() == SYCL_PIPELINED_ATTR) {
667+
Attribute.getKindAsString() == SyclPipelinedAttr) {
672668
auto PipelineOrInitiationInterval =
673669
getAttributeAsInteger<int32_t>(Attribute);
674670
MDNode *SPIRVMetadata;
675671
if (PipelineOrInitiationInterval < 0) {
676672
// Default pipelining desired
677673
SPIRVMetadata =
678-
buildSpirvDecorMetadata(Ctx, SPIRV_PIPELINE_ENABLE_DECOR, 1);
674+
buildSpirvDecorMetadata(Ctx, SpirvPipelineEnableDecor, 1);
679675
} else if (PipelineOrInitiationInterval == 0) {
680676
// No pipelining desired
681677
SPIRVMetadata =
682-
buildSpirvDecorMetadata(Ctx, SPIRV_PIPELINE_ENABLE_DECOR, 0);
678+
buildSpirvDecorMetadata(Ctx, SpirvPipelineEnableDecor, 0);
683679
} else {
684680
// Pipelining desired, with specified Initiation Interval
685681
SPIRVMetadata =
686-
buildSpirvDecorMetadata(Ctx, SPIRV_PIPELINE_ENABLE_DECOR, 1);
682+
buildSpirvDecorMetadata(Ctx, SpirvPipelineEnableDecor, 1);
687683
MDOps.push_back(SPIRVMetadata);
688-
SPIRVMetadata =
689-
buildSpirvDecorMetadata(Ctx, SPIRV_INITIATION_INTERVAL_DECOR,
690-
PipelineOrInitiationInterval);
684+
SPIRVMetadata = buildSpirvDecorMetadata(
685+
Ctx, SpirvInitiationIntervalDecor, PipelineOrInitiationInterval);
691686
}
692687
MDOps.push_back(SPIRVMetadata);
693688
} else if (MDNode *SPIRVMetadata =
@@ -946,7 +941,7 @@ bool CompileTimePropertiesPass::transformSYCLPropertiesAnnotation(
946941

947942
if (CacheProp) {
948943
LLVMContext &Ctx = M.getContext();
949-
unsigned MDKindID = Ctx.getMDKindID(SPIRV_DECOR_MD_KIND);
944+
unsigned MDKindID = Ctx.getMDKindID(SpirvDecorMdKind);
950945
if (!FPGAProp && llvm::isa<llvm::Instruction>(IntrInst->getArgOperand(0))) {
951946
// If there are no annotations other than cache controls we can apply the
952947
// controls to the pointer and remove the intrinsic.

sycl/doc/EnvironmentVariables.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,8 @@ older hardware or when SYCL_UR_USE_LEVEL_ZERO_V2=0 is set.</span>
272272
| Environment variable | Values | Description | Adapter Support |
273273
| -------------------- | ------ | ----------- | --------------- |
274274
| `UR_L0_V2_FORCE_DISABLE_COPY_OFFLOAD` | Integer | By default, copy operations submitted to any queue can be offloaded to dedicated copy engines. Setting this variable instructs the driver to keep all copy operations on the engine behind the original queue. The default value is 0. | V2 |
275+
| `UR_L0_V2_DISABLE_ZE_LAUNCH_KERNEL_WITH_ARGS` | Integer | By default, `ZeCommandListAppendLaunchKernelWithArguments()` will be called. Setting this variable instructs the adapter to not call `ZeCommandListAppendLaunchKernelWithArguments()` and use the old path using `ZeCommandListAppendLaunchKernel()`. The default value is 0. | V2 |
276+
| `UR_L0_V2_FORCE_BATCHED` | Any(\*) | Adds UR_QUEUE_FLAG_SUBMISSION_BATCHED flag to the flags passed to urQueueCreate as arguments. The variable does not overwrite other passed flags, therefore invalid combinations (such as setting both UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE and UR_QUEUE_FLAG_SUBMISSION_BATCHED) are possible. | V2 |
275277
| `SYCL_PI_LEVEL_ZERO_SINGLE_THREAD_MODE` | Integer | A single-threaded app has an opportunity to enable this mode to avoid overhead from mutex locking in the Level Zero adapter. A value greater than 0 enables single thread mode. A value of 0 disables single thread mode. The default is 0. | Legacy |
276278
| `SYCL_PI_LEVEL_ZERO_USM_ALLOCATOR` | [EnableBuffers][;[MaxPoolSize][;[host\|device\|shared:][MaxPoolableSize][,[Capacity][,SlabMinSize]]]...] | EnableBuffers enables pooling for SYCL buffers, default 1, set to 0 to disable. MaxPoolSize is the maximum size of the pool, by default there is no size limit. MemType is host, device, shared or read_only_shared. Other parameters are values specified as positive integers with optional K, M or G suffix. MaxPoolableSize is the maximum allocation size that may be pooled, default 0 for shared, 2MB for host, 4MB for device and read_only_shared. Capacity is the number of allocations in each size range freed by the program but retained in the pool for reallocation, default 4. Size ranges follow this pattern: 64, 96, 128, 192, and so on, i.e., powers of 2, with one range in between. SlabMinSize is the minimum allocation size, 64KB for host and device, 2MB for shared and read_only_shared. Example: SYCL_PI_LEVEL_ZERO_USM_ALLOCATOR=1;32M;host:1M,4,64K;device:1M,4,64K;shared:0,0,2M| Legacy and V2 |
277279
| `SYCL_PI_LEVEL_ZERO_BATCH_SIZE` | Integer | Sets a preferred number of compute commands to batch into a command list before executing the command list. A value of 0 causes the batch size to be adjusted dynamically. A value greater than 0 specifies fixed size batching, with the batch size set to the specified value. The default is 0. | Legacy |
@@ -292,6 +294,8 @@ older hardware or when SYCL_UR_USE_LEVEL_ZERO_V2=0 is set.</span>
292294
| `SYCL_PI_LEVEL_ZERO_USM_RESIDENT` | Integer | Bit-mask controls if/where to make USM allocations resident at the time of allocation. Input value is of the form 0xHSD, where 4-bits of D control device allocations, 4-bits of S control shared allocations, and 4-bits of H control host allocations. Each 4-bit component is holding one of the following values: "0" - then no special residency is forced, "1" - then allocation is made resident at the device of allocation, or "2" - then allocation is made resident on all devices in the context of allocation that have P2P access to the device of allocation. Default is 0x002, i.e. force full residency for device allocations only. | Legacy |
293295
| `SYCL_PI_LEVEL_ZERO_USE_NATIVE_USM_MEMCPY2D` | Integer | When set to a positive value enables the use of Level Zero USM 2D memory copy operations. Default is 0. | Legacy |
294296

297+
`(*) Note: Any means this environment variable is effective when set to any non-null value.`
298+
295299
## Debugging variables for CUDA Adapter
296300

297301
:warning: **Warning:** <span style="color:red">the environment variables

0 commit comments

Comments
 (0)