diff --git a/llvm/lib/SYCLLowerIR/CompileTimeProperties.def b/llvm/lib/SYCLLowerIR/CompileTimeProperties.def index e2ba9e80e0d82..02417d366fbff 100644 --- a/llvm/lib/SYCLLowerIR/CompileTimeProperties.def +++ b/llvm/lib/SYCLLowerIR/CompileTimeProperties.def @@ -60,3 +60,8 @@ SYCL_COMPILE_TIME_PROPERTY("sycl-bi-directional-ports-true", 5885, // SPIR-V Spec: https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/INTEL/SPV_INTEL_cache_controls.asciidoc SYCL_COMPILE_TIME_PROPERTY("sycl-prefetch-hint", 6442, DecorValueTy::uint32) SYCL_COMPILE_TIME_PROPERTY("sycl-prefetch-hint-nt", 6442, DecorValueTy::uint32) + +// The corresponding SPIR-V OpCodes for cache control properties +SYCL_COMPILE_TIME_PROPERTY("sycl-cache-read-hint", 6442, DecorValueTy::uint32) +SYCL_COMPILE_TIME_PROPERTY("sycl-cache-read-assertion", 6442, DecorValueTy::uint32) +SYCL_COMPILE_TIME_PROPERTY("sycl-cache-write-hint", 6443, DecorValueTy::uint32) diff --git a/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp b/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp index f884f403b5910..8c3707852456f 100644 --- a/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp +++ b/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp @@ -42,6 +42,9 @@ constexpr uint32_t SPIRV_HOST_ACCESS_DEFAULT_VALUE = 2; // Read/Write constexpr uint32_t SPIRV_INITIATION_INTERVAL_DECOR = 5917; constexpr uint32_t SPIRV_PIPELINE_ENABLE_DECOR = 5919; +constexpr uint32_t SPIRV_CACHE_CONTROL_READ_DECOR = 6442; +constexpr uint32_t SPIRV_CACHE_CONTROL_WRITE_DECOR = 6443; + enum class DecorValueTy { uint32, boolean, @@ -97,6 +100,72 @@ MDNode *buildSpirvDecorMetadata(LLVMContext &Ctx, uint32_t OpCode, return MDNode::get(Ctx, MD); } +/// Builds a metadata node for a SPIR-V decoration for cache controls +/// where decoration code and value are both uint32_t integers. +/// The value encodes a cache level and a cache control type. +/// +/// @param Ctx [in] the LLVM Context. +/// @param Name [in] the SPIR-V property string name. +/// @param OpCode [in] the SPIR-V opcode. +/// @param CacheMode [in] whether read or write. +/// @param CacheLevel [in] the cache level. +/// +/// @returns a pointer to the metadata node created for the required decoration +/// and its values. +MDNode *buildSpirvDecorCacheProp(LLVMContext &Ctx, StringRef Name, + uint32_t OpCode, uint32_t CacheMode, + uint32_t CacheLevel) { + // SPIR-V encodings of read control + enum cache_control_read_type { + read_uncached = 0, + read_cached = 1, + read_streaming = 2, + read_invalidate = 3, + read_const_cached = 4 + }; + // SPIR-V encodings of write control + enum cache_control_write_type { + write_uncached = 0, + write_through = 1, + write_back = 2, + write_streaming = 3 + }; + // SYCL encodings of read/write control. Definition of cache_mode should match + // definition in SYCL header file cache_control_properties.hpp. + enum class cache_mode { + uncached, + cached, + streaming, + invalidate, + constant, + write_through, + write_back + }; + static uint32_t SPIRVReadControl[] = {read_uncached, read_cached, + read_streaming, read_invalidate, + read_const_cached}; + static uint32_t SPIRVWriteControl[] = { + write_uncached, write_uncached, write_streaming, write_uncached, + write_uncached, write_through, write_back}; + + // Map SYCL encoding to SPIR-V + uint32_t CacheProp; + if (Name.starts_with("sycl-cache-read")) + CacheProp = SPIRVReadControl[CacheMode]; + else + CacheProp = SPIRVWriteControl[CacheMode]; + + auto *Ty = Type::getInt32Ty(Ctx); + SmallVector MD; + MD.push_back(ConstantAsMetadata::get( + Constant::getIntegerValue(Ty, APInt(32, OpCode)))); + MD.push_back(ConstantAsMetadata::get( + Constant::getIntegerValue(Ty, APInt(32, CacheLevel)))); + MD.push_back(ConstantAsMetadata::get( + Constant::getIntegerValue(Ty, APInt(32, CacheProp)))); + return MDNode::get(Ctx, MD); +} + /// Builds a metadata node for a SPIR-V decoration (decoration code /// is \c uint32_t integer and value is a string). /// @@ -625,9 +694,12 @@ bool CompileTimePropertiesPass::transformSYCLPropertiesAnnotation( // check alignment annotation and apply it to load/store parseAlignmentAndApply(M, IntrInst); - // Read the annotation values and create the new annotation string. + // Read the annotation values and create new annotation strings. std::string NewAnnotString = ""; auto Properties = parseSYCLPropertiesString(M, IntrInst); + SmallVector MDOpsCacheProp; + bool CacheProp = false; + bool FPGAProp = false; for (const auto &[PropName, PropVal] : Properties) { // sycl-alignment is converted to align on // previous parseAlignmentAndApply(), dropping here @@ -639,59 +711,118 @@ bool CompileTimePropertiesPass::transformSYCLPropertiesAnnotation( continue; uint32_t DecorCode = DecorIt->second.Code; - // Expected format is '{X}' or '{X:Y}' where X is decoration ID and - // Y is the value if present. It encloses Y in " to ensure that - // string values are handled correctly. Note that " around values are - // always valid, even if the decoration parameters are not strings. - NewAnnotString += "{" + std::to_string(DecorCode); - if (PropVal) - NewAnnotString += ":\"" + PropVal->str(); - - if (PropName == "sycl-prefetch-hint") - NewAnnotString += ",1"; // CachedINTEL - if (PropName == "sycl-prefetch-hint-nt") - NewAnnotString += ",3"; // InvalidateAfterReadINTEL - - if (PropVal) - NewAnnotString += "\""; - NewAnnotString += "}"; + // Handle cache control properties + if ((*PropName).starts_with("sycl-cache-")) { + CacheProp = true; + auto DecorValue = PropVal; + uint32_t AttrVal; + DecorValue->getAsInteger(0, AttrVal); + // Format is: + // !Annot = !{!CC1, !CC2, ...} + // !CC1 = !{i32 Load/Store, i32 Level, i32 Control} + // !CC2 = !{i32 Load/Store, i32 Level, i32 Control} + // ... + LLVMContext &Ctx = M.getContext(); + uint32_t CacheMode = 0; + while (AttrVal) { + // The attribute value encodes cache control and levels. + // Low-order to high-order nibbles hold cache levels specified for the + // enumerated SYCL cache modes. Lowest order nibble for uncached, next + // for cached, and so on. + // In each nibble cache levels are encoded as L1=1, L2=2, L3=4 and L4=8. + // The SPIR-V encoding of cache levels L1..L4 uses values 0..3. + uint32_t CacheLevel = 0; + uint32_t LevelMask = AttrVal & 0xf; + while (LevelMask) { + if (LevelMask & 1) + MDOpsCacheProp.push_back(buildSpirvDecorCacheProp( + Ctx, *PropName, DecorCode, CacheMode, CacheLevel)); + ++CacheLevel; + LevelMask >>= 1; + } + ++CacheMode; + AttrVal >>= 4; + } + } else { + FPGAProp = true; + // Expected format is '{X}' or '{X:Y}' where X is decoration ID and + // Y is the value if present. It encloses Y in " to ensure that + // string values are handled correctly. Note that " around values are + // always valid, even if the decoration parameters are not strings. + NewAnnotString += "{" + std::to_string(DecorCode); + if (PropVal) + NewAnnotString += ":\"" + PropVal->str(); + + if (PropName == "sycl-prefetch-hint") + NewAnnotString += ",1"; // CachedINTEL + if (PropName == "sycl-prefetch-hint-nt") + NewAnnotString += ",3"; // InvalidateAfterReadINTEL + + if (PropVal) + NewAnnotString += "\""; + NewAnnotString += "}"; + } } - // If the new annotation string is empty there is no reason to keep it, so - // replace it with the first operand and mark it for removal. - if (NewAnnotString.empty()) { + // If there are no other annotations (except "alignment") then there is no + // reason to keep the original intrinsic, so replace it with the first operand + // and mark it for removal. + if (!CacheProp && !FPGAProp) { IntrInst->replaceAllUsesWith(IntrInst->getOperand(0)); RemovableAnnotations.push_back(IntrInst); return true; } - // Either reuse a previously generated one or create a new global variable - // with the new annotation string. - GlobalVariable *NewAnnotStringGV = nullptr; - auto ExistingNewAnnotStringIt = ReusableAnnotStrings.find(NewAnnotString); - if (ExistingNewAnnotStringIt != ReusableAnnotStrings.end()) { - NewAnnotStringGV = ExistingNewAnnotStringIt->second; - } else { - Constant *NewAnnotStringData = - ConstantDataArray::getString(M.getContext(), NewAnnotString); - NewAnnotStringGV = new GlobalVariable( - M, NewAnnotStringData->getType(), true, GlobalValue::PrivateLinkage, - NewAnnotStringData, ".str", nullptr, llvm::GlobalValue::NotThreadLocal, - IntrAnnotStringArg->getType()->getPointerAddressSpace()); - NewAnnotStringGV->setSection(AnnotStrArgGV->getSection()); - NewAnnotStringGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); - ReusableAnnotStrings.insert({NewAnnotString, NewAnnotStringGV}); + if (FPGAProp) { + // Either reuse a previously generated one or create a new global variable + // with the new annotation string. + GlobalVariable *NewAnnotStringGV = nullptr; + auto ExistingNewAnnotStringIt = ReusableAnnotStrings.find(NewAnnotString); + if (ExistingNewAnnotStringIt != ReusableAnnotStrings.end()) { + NewAnnotStringGV = ExistingNewAnnotStringIt->second; + } else { + Constant *NewAnnotStringData = + ConstantDataArray::getString(M.getContext(), NewAnnotString); + NewAnnotStringGV = new GlobalVariable( + M, NewAnnotStringData->getType(), true, GlobalValue::PrivateLinkage, + NewAnnotStringData, ".str", nullptr, + llvm::GlobalValue::NotThreadLocal, + IntrAnnotStringArg->getType()->getPointerAddressSpace()); + NewAnnotStringGV->setSection(AnnotStrArgGV->getSection()); + NewAnnotStringGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + ReusableAnnotStrings.insert({NewAnnotString, NewAnnotStringGV}); + } + + // Replace the annotation string with a bitcast of the new global variable. + IntrInst->setArgOperand( + 1, ConstantExpr::getBitCast(NewAnnotStringGV, + IntrAnnotStringArg->getType())); + + // The values are now in the annotation string, so we can remove the + // original annotation value. + PointerType *Arg4PtrTy = + cast(IntrInst->getArgOperand(4)->getType()); + IntrInst->setArgOperand(4, ConstantPointerNull::get(Arg4PtrTy)); } - // Replace the annotation string with a bitcast of the new global variable. - IntrInst->setArgOperand( - 1, ConstantExpr::getBitCast(NewAnnotStringGV, - IntrAnnotStringArg->getType())); + if (CacheProp) { + LLVMContext &Ctx = M.getContext(); + unsigned MDKindID = Ctx.getMDKindID(SPIRV_DECOR_MD_KIND); + if (!FPGAProp) { + // If there are no annotations other than cache controls we can apply the + // controls to the pointer and remove the intrinsic. + auto PtrInstr = cast(IntrInst->getArgOperand(0)); + PtrInstr->setMetadata(MDKindID, MDTuple::get(Ctx, MDOpsCacheProp)); + // Replace all uses of IntrInst with first operand + IntrInst->replaceAllUsesWith(PtrInstr); + // Delete the original IntrInst + RemovableAnnotations.push_back(IntrInst); + } else { + // If there were FPGA annotations then we retain the original intrinsic + // and apply the cache control properties to its result. + IntrInst->setMetadata(MDKindID, MDTuple::get(Ctx, MDOpsCacheProp)); + } + } - // The values are not in the annotation string, so we can remove the original - // annotation value. - PointerType *Arg4PtrTy = - cast(IntrInst->getArgOperand(4)->getType()); - IntrInst->setArgOperand(4, ConstantPointerNull::get(Arg4PtrTy)); return true; } diff --git a/sycl/doc/extensions/proposed/sycl_ext_intel_cache_controls.asciidoc b/sycl/doc/extensions/proposed/sycl_ext_intel_cache_controls.asciidoc index adc657c348fc4..9de9df84051da 100755 --- a/sycl/doc/extensions/proposed/sycl_ext_intel_cache_controls.asciidoc +++ b/sycl/doc/extensions/proposed/sycl_ext_intel_cache_controls.asciidoc @@ -92,178 +92,217 @@ supports. === Properties -Below is a list of new compile-time constant properties supported with -`annotated_ptr`. - +This extension adds three properties: `read_hint`, `write_hint`, and +`read_assertion`. The value for each of these properties is a list +of `cache_control` structures. Each `cache_control` specifies a +cache mode and a list of cache levels to which that mode applies. +To illustrate, consider the following `read_hint` property: ```c++ -namespace sycl::ext::intel::experimental { +read_hint, + cache_control> +``` -enum class cache_level : /*unspecified*/ { - L1, - L2, - L3, - L4, -}; +This property indicates that read operations should be uncached at +level 1 and cached at levels 2 and 3. Cache level L1 indicates the +cache closest to the processing unit, cache level L2 indicates the +next furthest cache level, etc. It is legal to specify a cache_level +that does not exist on the target device, but the cache level will +be ignored in this case. -enum class cache_control_read_type : /* unspecified */ { - cached, - uncached, - streaming, - invalidate_after_read, - const_cached -}; +Note that a property list may contain at most one instance of any +particular property. For example, it is not valid for a property list +to contain multiple `read_hint` properties. In order to specify multiple +"read hint" cache controls in the same property list, use a single +`read_hint` property with several `cache_control` structures. +The same rule applies to `write_hint` and `read_assertion`. -enum class cache_control_write_type : /* unspecified */ { - uncached, - streaming, - write_through, - write_back -}; +==== Cache modes -struct cache_control_read_key { - template - using value_t = property_value>; -}; +The `cache_control` structure is used by each of the three properties +to specify a cache mode and a list of cache levels to which it applies. -struct cache_control_write_key { - template - using value_t = property_value>; +```c++ +namespace sycl::ext { +namespace intel::experimental { + +enum class cache_mode { + uncached, + cached, + streaming, + invalidate, + constant, + write_through, + write_back }; +using cache_level = sycl::ext::oneapi::experimental::cache_level; -template -inline constexpr cache_control_read_key::value_t cache_control_read; +template struct cache_control {}; -template -inline constexpr cache_control_write_key::value_t cache_control_write; +} // namespace intel::experimental +``` -template<> -struct is_property_key : std::true_type {}; +The allowed cache modes in `read_hint` are `uncached`, `cached` or `streaming`. +`write_hint` may be `uncached`, `streaming`, `write_through` or `write_back`. +`read_assertion` is either `invalidate` or `constant`. -template<> -struct is_property_key : std::true_type {}; +==== Cache controls -template -struct is_property_key_of< - cache_control_read_key, annotated_ptr> : std::true_type {}; +Of the cache levels specified by the cache control properties of an +`annotated_ptr`, at each level at most one cache mode of +`read_hint`/`read_assertion` type may be specified and at most one cache +mode of `write_hint` type. -template -struct is_property_key_of< - cache_control_write_key, annotated_ptr> : std::true_type {}; +Repeating a cache level within a cache control is an error. For example: +```c++ +read_hint> +``` -template -inline constexpr cache_control_read_key::value_t -cache_control_read_cached; +Specifying more than one cache mode from `read_hint`/`read_assertion` +or more than one `write_hint` type at a particular cache level is an error. +For example, specifying `cached` and `uncached` at level L2: +```c++ +read_hint, + cache_control> +``` +However, a cache mode from `read_hint`/`read_assertion` and +one from `write_hint` may be specified at the same level: +```c++ +read_hint>, +write_hint> +``` -template -inline constexpr cache_control_read_key::value_t cache_control_read_uncached; +==== Read hint property -template -inline constexpr cache_control_read_key::value_t cache_control_read_streaming; +This property is a hint requesting specific cache behavior when +loading from memory through an `annotated_ptr`. This property +can affect the performance of device code, but it does not change +the semantics. -template -inline constexpr cache_control_read_key::value_t cache_control_invalidate_after_read; +```c++ +namespace sycl::ext { +namespace intel::experimental { -template -inline constexpr cache_control_read_key::value_t cache_control_read_const_cached; +struct read_hint_key { + template + using value_t = property_value; +}; -template -inline constexpr cache_control_write_key::value_t cache_control_write_uncached; +template +inline constexpr read_hint_key::value_t read_hint; -template -inline constexpr cache_control_write_key::value_t cache_control_write_streaming; +} // namespace intel::experimental -template -inline constexpr cache_control_write_key::value_t cache_control_write_through; +namespace oneapi { +namespace experimental { -template -inline constexpr cache_control_write_key::value_t cache_control_write_back; +template <> +struct is_property_key : std::true_type {}; +template +struct is_property_key_of> : std::true_type {}; -} // namespace sycl::ext::intel::experimental +} // namespace sycl::ext::oneapi::experimental ``` -Each of these properties takes a cache level parameter indicating which level -of the cache hierarchy is affected. Cache level 0 indicates the cache closest -to the processing unit, cache level 1 indicates the next furthest cache -level, etc. It is legal to specify a cache level that does not exist on -the target device, but the property will be ignored in this case. - -Note that a property specifies the cache behavior only for the indicated -cache level. In order to specify the behavior for multiple cache levels, -multiple properties should be specified. - -It is legal to specify several different cache control properties in the -same `annotated_ptr`. However, all instances of `cache_control_read_type` must -have different cache levels and all instances of `cache_control_write_type` -must have difference cache levels. - -The cache control properties are divided into two categories: those that -are hints and those that are assertions by the application. - -==== Cache control hints -These properties are hints requesting specific cache behavior when -loading or storing to memory through the `annotated_ptr`. These properties can -affect the performance of device code, but they do not change the semantics. - -- -[options="header", cols="2,1"] +[options="header", cols="3,2"] |==== | Property | Description a| [source] ---- -cache_control_read +read_hint> ---- | This property requests that loads from memory through the `annotated_ptr` -may cache the data at level `L` in the memory hierarchy. +should not cache the data at levels `Ls` in the memory hierarchy. a| [source] ---- -cache_control_read +read_hint> ---- | This property requests that loads from memory through the `annotated_ptr` -should not cache the data at level `L` in the memory hierarchy. +may cache the data at levels `Ls` in the memory hierarchy. a| [source] ---- -cache_control_read +read_hint> ---- | This property requests that loads from memory through the `annotated_ptr` -should cache the data at cache level `L`. The eviction policy is to give +should cache the data at cache levels `Ls`. The eviction policy is to give lower priority to data cached using this property versus the `cached` property. +|==== +-- + +==== Write hint property + +This property is a hint requesting specific cache behavior when +storing to memory through an `annotated_ptr`. +This property can affect the performance of device code, but it +does not change the semantics. + +```c++ +namespace sycl::ext { +namespace intel::experimental { + +struct write_hint_key { + template + using value_t = property_value; +}; + +template +inline constexpr write_hint_key::value_t write_hint; + +} // namespace intel::experimental + +namespace oneapi { +namespace experimental { + +template <> +struct is_property_key : std::true_type {}; +template +struct is_property_key_of> : std::true_type {}; + +} // namespace sycl::ext::oneapi::experimental +``` +-- +[options="header", cols="3,2"] +|==== +| Property | Description a| [source] ---- -cache_control_write +write_hint> ---- | This property requests that writes to memory through the `annotated_ptr` -should not cache the data at level `L` in the memory hierarchy. +should not cache the data at levels `Ls` in the memory hierarchy. a| [source] ---- -cache_control_write +write_hint> ---- | This property requests that writes to memory through the `annotated_ptr` -should immediately write the data to the next-level cache after `L` -and mark the cache line at level `L` as "not dirty". +should immediately write the data to the next-level cache after `Ls` +and mark the cache line at levels `Ls` as "not dirty". a| [source] ---- -cache_control_write +write_hint> ---- | This property requests that writes to memory through the `annotated_ptr` -should write the data into the cache at level `L` and mark the cache line as +should write the data into the cache at levels `Ls` and mark the cache line as "dirty". Upon eviction, "dirty" data will be written into the cache at -level higher than `L`. +level higher than `Ls`. a| [source] ---- -cache_control_write +write_hint> ---- | This property is the same as `write_through`, but requests use of a @@ -272,20 +311,48 @@ via a `streaming` cache control. |==== -- -==== Assertions by the application -These properties are assertions by the application, promising that the -application accesses memory in a certain way. Care must be taken when -using these properties because they can lead to undefined behavior if -they are misused. +==== Read assertion property + +This property is an assertion by the application, promising that +the application accesses memory in a certain way. +Care must be taken when using this property because it can +lead to undefined behavior if it is misused. + +```c++ +namespace sycl::ext { +namespace intel::experimental { + +struct read_assertion_key { + template + using value_t = property_value; +}; + +template +inline constexpr read_assertion_key::value_t read_assertion; + +} // namespace intel::experimental + +namespace oneapi { +namespace experimental { + +template <> +struct is_property_key + : std::true_type {}; +template +struct is_property_key_of> : std::true_type {}; + +} // namespace sycl::ext::oneapi::experimental +``` -- -[options="header", cols="3,1"] +[options="header", cols="3,2"] |==== | Property | Description a| [source] ---- -cache_control_read +read_assertion> ---- | This property asserts that the cache line into which data is loaded @@ -296,7 +363,7 @@ the cache line and discard "dirty" data. If the assertion is violated a| [source] ---- -cache_control_read +read_assertion> ---- | This property asserts that the cache line containing the data @@ -307,10 +374,3 @@ is undefined. |==== -- -== Implementation notes - -It is intended that the SYCL cache control properties will be used by the -compiler to generate SPIR-V cache control operations. Alternatively, the -properties could be implemented by generating intrinsic function calls -that match the cache control types. - diff --git a/sycl/include/sycl/ext/intel/experimental/cache_control_properties.hpp b/sycl/include/sycl/ext/intel/experimental/cache_control_properties.hpp new file mode 100755 index 0000000000000..58103d39a17a0 --- /dev/null +++ b/sycl/include/sycl/ext/intel/experimental/cache_control_properties.hpp @@ -0,0 +1,245 @@ +//==--------- SYCL annotated_ptr properties for caching control ------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include +#include + +#include + +namespace sycl { +inline namespace _V1 { +namespace ext { +namespace intel { +namespace experimental { + +// SYCL encodings of read/write control. Definition of cache_mode should match +// definition in file CompileTimePropertiesPass.cpp. +enum class cache_mode { + uncached, + cached, + streaming, + invalidate, + constant, + write_through, + write_back +}; +using cache_level = sycl::ext::oneapi::experimental::cache_level; + +namespace detail { + +template static constexpr void checkLevel1() { + static_assert(count < 2, "Duplicate cache_level L1 specification"); +} +template static constexpr void checkLevel2() { + static_assert(count < 2, "Duplicate cache_level L2 specification"); +} +template static constexpr void checkLevel3() { + static_assert(count < 2, "Duplicate cache_level L3 specification"); +} +template static constexpr void checkLevel4() { + static_assert(count < 2, "Duplicate cache_level L4 specification"); +} + +} // namespace detail + +template struct cache_control { + static constexpr const auto mode = M; + static constexpr const int countL1 = ((Ls == cache_level::L1 ? 1 : 0) + ...); + static constexpr const int countL2 = ((Ls == cache_level::L2 ? 1 : 0) + ...); + static constexpr const int countL3 = ((Ls == cache_level::L3 ? 1 : 0) + ...); + static constexpr const int countL4 = ((Ls == cache_level::L4 ? 1 : 0) + ...); + static constexpr const uint32_t levels = ((1 << static_cast(Ls)) | ...); + // Starting bit position for cache levels of a cache mode are uncached=0, + // cached=4, streaming=8, invalidate=12, constant=16, write_through=20 and + // write_back=24. The shift value is computed as cache_mode * 4. + static constexpr const uint32_t encoding = + (countL1, countL2, countL3, countL4, detail::checkLevel1(), + detail::checkLevel2(), detail::checkLevel3(), + detail::checkLevel4(), levels << static_cast(M) * 4); +}; + +template +using property_value = + sycl::ext::oneapi::experimental::property_value; + +struct read_hint_key { + template + using value_t = property_value; +}; + +struct read_assertion_key { + template + using value_t = property_value; +}; + +struct write_hint_key { + template + using value_t = property_value; +}; + +template +inline constexpr read_hint_key::value_t read_hint; + +template +inline constexpr read_assertion_key::value_t read_assertion; + +template +inline constexpr write_hint_key::value_t write_hint; + +} // namespace experimental +} // namespace intel + +namespace oneapi { +namespace experimental { + +template class annotated_ptr; + +template <> +struct is_property_key : std::true_type {}; +template +struct is_property_key_of> : std::true_type {}; + +template <> +struct is_property_key + : std::true_type {}; +template +struct is_property_key_of> : std::true_type {}; + +template <> +struct is_property_key : std::true_type {}; +template +struct is_property_key_of> : std::true_type {}; + +namespace detail { + +// Values assigned to cache levels in a nibble. +static constexpr int L1BIT = 1; +static constexpr int L2BIT = 2; +static constexpr int L3BIT = 4; +static constexpr int L4BIT = 8; + +static constexpr int countL(int levels, int mask) { + return levels & mask ? 1 : 0; +} + +template +static constexpr void checkUnique() { + static_assert(countL1 < 2, "Conflicting cache_mode at L1"); + static_assert(countL2 < 2, "Conflicting cache_mode at L2"); + static_assert(countL3 < 2, "Conflicting cache_mode at L3"); + static_assert(countL4 < 2, "Conflicting cache_mode at L4"); +} + +using cache_mode = sycl::ext::intel::experimental::cache_mode; + +template static constexpr int checkReadHint() { + static_assert( + M == cache_mode::uncached || M == cache_mode::cached || + M == cache_mode::streaming, + "read_hint must specify cache_mode uncached, cached or streaming"); + return 0; +} + +template static constexpr int checkReadAssertion() { + static_assert( + M == cache_mode::invalidate || M == cache_mode::constant, + "read_assertion must specify cache_mode invalidate or constant"); + return 0; +} + +template static constexpr int checkWriteHint() { + static_assert(M == cache_mode::uncached || M == cache_mode::write_through || + M == cache_mode::write_back || M == cache_mode::streaming, + "write_hint must specify cache_mode uncached, write_through, " + "write_back or streaming"); + return 0; +} + +template <> struct PropertyToKind { + static constexpr PropKind Kind = PropKind::CacheControlReadHint; +}; +template <> +struct IsCompileTimeProperty + : std::true_type {}; +template +struct PropertyMetaInfo> { + static constexpr const char *name = "sycl-cache-read-hint"; + static constexpr const int value = + ((checkReadHint() + ...), + checkUnique<(countL(Cs::levels, L1BIT) + ...), + (countL(Cs::levels, L2BIT) + ...), + (countL(Cs::levels, L3BIT) + ...), + (countL(Cs::levels, L4BIT) + ...)>(), + ((Cs::encoding) | ...)); +}; + +template <> struct PropertyToKind { + static constexpr PropKind Kind = PropKind::CacheControlReadAssertion; +}; +template <> +struct IsCompileTimeProperty + : std::true_type {}; +template +struct PropertyMetaInfo< + intel::experimental::read_assertion_key::value_t> { + static constexpr const char *name = "sycl-cache-read-assertion"; + static constexpr const int value = + ((checkReadAssertion() + ...), + checkUnique<(countL(Cs::levels, L1BIT) + ...), + (countL(Cs::levels, L2BIT) + ...), + (countL(Cs::levels, L3BIT) + ...), + (countL(Cs::levels, L4BIT) + ...)>(), + ((Cs::encoding) | ...)); +}; + +template <> struct PropertyToKind { + static constexpr PropKind Kind = PropKind::CacheControlWrite; +}; +template <> +struct IsCompileTimeProperty + : std::true_type {}; +template +struct PropertyMetaInfo> { + static constexpr const char *name = "sycl-cache-write-hint"; + static constexpr const int value = + ((checkWriteHint() + ...), + checkUnique<(countL(Cs::levels, L1BIT) + ...), + (countL(Cs::levels, L2BIT) + ...), + (countL(Cs::levels, L3BIT) + ...), + (countL(Cs::levels, L4BIT) + ...)>(), + ((Cs::encoding) | ...)); +}; + +} // namespace detail + +template +struct is_valid_property> + : std::bool_constant::value> {}; + +template +struct is_valid_property< + T, intel::experimental::read_assertion_key::value_t> + : std::bool_constant::value> {}; + +template +struct is_valid_property> + : std::bool_constant::value> {}; + +} // namespace experimental +} // namespace oneapi +} // namespace ext +} // namespace _V1 +} // namespace sycl diff --git a/sycl/include/sycl/ext/oneapi/experimental/annotated_ptr/annotated_ptr.hpp b/sycl/include/sycl/ext/oneapi/experimental/annotated_ptr/annotated_ptr.hpp index 11f9af7a30e05..1ae1633b337ac 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/annotated_ptr/annotated_ptr.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/annotated_ptr/annotated_ptr.hpp @@ -9,6 +9,7 @@ #pragma once #include +#include #include #include #include @@ -159,9 +160,22 @@ __SYCL_TYPE(annotated_ptr) annotated_ptr> { using property_list_t = detail::properties_t; // buffer_location and alignment are allowed for annotated_ref + // Cache controls are allowed for annotated_ptr using allowed_properties = std::tuple), - decltype(ext::oneapi::experimental::alignment<0>)>; + decltype(ext::oneapi::experimental::alignment<0>), + decltype(ext::intel::experimental::read_hint< + ext::intel::experimental::cache_control< + ext::intel::experimental::cache_mode::cached, + cache_level::L1>>), + decltype(ext::intel::experimental::read_assertion< + ext::intel::experimental::cache_control< + ext::intel::experimental::cache_mode::cached, + cache_level::L1>>), + decltype(ext::intel::experimental::write_hint< + ext::intel::experimental::cache_control< + ext::intel::experimental::cache_mode::cached, + cache_level::L1>>)>; using filtered_properties = typename PropertiesFilter::tuple; diff --git a/sycl/include/sycl/ext/oneapi/properties/property.hpp b/sycl/include/sycl/ext/oneapi/properties/property.hpp index 833565d9bb4f2..304356f71b30f 100644 --- a/sycl/include/sycl/ext/oneapi/properties/property.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/property.hpp @@ -215,8 +215,11 @@ enum PropKind : uint32_t { ESIMDL2CacheHint = 45, ESIMDL3CacheHint = 46, UsmKind = 47, + CacheControlReadHint = 48, + CacheControlReadAssertion = 49, + CacheControlWrite = 50, // PropKindSize must always be the last value. - PropKindSize = 48, + PropKindSize = 51, }; // This trait must be specialized for all properties and must have a unique diff --git a/sycl/source/feature_test.hpp.in b/sycl/source/feature_test.hpp.in index 3180688a0257f..dd461b7049aa7 100644 --- a/sycl/source/feature_test.hpp.in +++ b/sycl/source/feature_test.hpp.in @@ -92,6 +92,7 @@ inline namespace _V1 { #define SYCL_EXT_ONEAPI_DEVICE_GLOBAL 1 #define SYCL_EXT_INTEL_QUEUE_IMMEDIATE_COMMAND_LIST 1 #define SYCL_EXT_ONEAPI_PREFETCH 1 +#define SYCL_EXT_INTEL_CACHE_CONTROLS 1 #ifndef __has_include #define __has_include(x) 0 diff --git a/sycl/test/extensions/properties/properties_cache_control.cpp b/sycl/test/extensions/properties/properties_cache_control.cpp new file mode 100755 index 0000000000000..273079334036e --- /dev/null +++ b/sycl/test/extensions/properties/properties_cache_control.cpp @@ -0,0 +1,121 @@ +// RUN: %clangxx -fsycl-device-only -S -Xclang -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix CHECK-IR + +#include + +using namespace sycl; +using namespace ext::oneapi::experimental; +using namespace ext::intel::experimental; + +using load_hint = annotated_ptr< + float, decltype(properties( + alignment<8>, + read_hint, + cache_control>))>; +using load_assertion = annotated_ptr< + int, + decltype(properties( + alignment<8>, + read_assertion, + cache_control>))>; +using store_hint = annotated_ptr< + float, + decltype(properties( + write_hint, + cache_control, + cache_control>))>; +using load_store_hint = annotated_ptr< + float, + decltype(properties( + read_hint>, + read_assertion>, + write_hint< + cache_control>))>; + +void cache_control_read_hint_func() { + queue q; + constexpr int N = 10; + float *ArrayA = malloc_shared(N, q); + q.submit([&](handler &cgh) { + cgh.single_task<>([=]() { + load_hint src{&ArrayA[0]}; + *src = 55.0f; + }); + }); +} + +void cache_control_read_assertion_func() { + queue q; + constexpr int N = 10; + int *ArrayA = malloc_shared(N, q); + q.submit([&](handler &cgh) { + cgh.single_task<>([=]() { + load_assertion src{&ArrayA[0]}; + *src = 66; + }); + }); +} + +void cache_control_write_hint_func() { + queue q; + constexpr int N = 10; + float *ArrayA = malloc_shared(N, q); + q.submit([&](handler &cgh) { + cgh.single_task<>([=]() { + store_hint dst{&ArrayA[0]}; + *dst = 77.0f; + }); + }); +} + +void cache_control_read_write_func() { + queue q; + constexpr int N = 10; + float *ArrayA = malloc_shared(N, q); + q.submit([&](handler &cgh) { + cgh.single_task<>([=]() { + load_store_hint dst{&ArrayA[0]}; + *dst = 77.0f; + }); + }); +} + +// CHECK-IR: spir_kernel{{.*}}cache_control_read_hint_func +// CHECK-IR: {{.*}}addrspacecast ptr addrspace(1){{.*}}!spirv.Decorations [[RHINT:.*]] +// CHECK-IR: ret void + +// CHECK-IR: spir_kernel{{.*}}cache_control_read_assertion_func +// CHECK-IR: {{.*}}addrspacecast ptr addrspace(1){{.*}}!spirv.Decorations [[RASSERT:.*]] +// CHECK-IR: ret void + +// CHECK-IR: spir_kernel{{.*}}cache_control_write_hint_func +// CHECK-IR: {{.*}}addrspacecast ptr addrspace(1){{.*}}!spirv.Decorations [[WHINT:.*]] +// CHECK-IR: ret void + +// CHECK-IR: spir_kernel{{.*}}cache_control_read_write_func +// CHECK-IR: {{.*}}addrspacecast ptr addrspace(1){{.*}}!spirv.Decorations [[RWHINT:.*]] +// CHECK-IR: ret void + +// CHECK-IR: [[RHINT]] = !{[[RHINT1:.*]], [[RHINT2:.*]], [[RHINT3:.*]]} +// CHECK-IR: [[RHINT1]] = !{i32 6442, i32 1, i32 0} +// CHECK-IR: [[RHINT2]] = !{i32 6442, i32 2, i32 0} +// CHECK-IR: [[RHINT3]] = !{i32 6442, i32 0, i32 1} + +// CHECK-IR: [[RASSERT]] = !{[[RASSERT1:.*]], [[RASSERT2:.*]], [[RASSERT3:.*]]} +// CHECK-IR: [[RASSERT1]] = !{i32 6442, i32 1, i32 3} +// CHECK-IR: [[RASSERT2]] = !{i32 6442, i32 2, i32 3} +// CHECK-IR: [[RASSERT3]] = !{i32 6442, i32 0, i32 4} + +// CHECK-IR: [[WHINT]] = !{[[WHINT1:.*]], [[WHINT2:.*]], [[WHINT3:.*]], [[WHINT4:.*]]} +// CHECK-IR: [[WHINT1]] = !{i32 6443, i32 3, i32 3} +// CHECK-IR: [[WHINT2]] = !{i32 6443, i32 0, i32 1} +// CHECK-IR: [[WHINT3]] = !{i32 6443, i32 1, i32 2} +// CHECK-IR: [[WHINT4]] = !{i32 6443, i32 2, i32 2} + +// CHECK-IR: [[RWHINT]] = !{[[RWHINT1:.*]], [[RWHINT2:.*]], [[RWHINT3:.*]]} +// CHECK-IR: [[RWHINT1]] = !{i32 6442, i32 2, i32 1} +// CHECK-IR: [[RWHINT2]] = !{i32 6442, i32 3, i32 4} +// CHECK-IR: [[RWHINT3]] = !{i32 6443, i32 3, i32 1} diff --git a/sycl/test/extensions/properties/properties_cache_control_errors.cpp b/sycl/test/extensions/properties/properties_cache_control_errors.cpp new file mode 100755 index 0000000000000..b3de0a503f7f5 --- /dev/null +++ b/sycl/test/extensions/properties/properties_cache_control_errors.cpp @@ -0,0 +1,53 @@ +// RUN: %clangxx -fsycl-device-only -Xclang -fsycl-is-device -ferror-limit=0 \ +// RUN: -fsyntax-only -Xclang -verify -Xclang -verify-ignore-unexpected=note %s + +#include + +using namespace sycl; +using namespace ext::oneapi::experimental; +using namespace ext::intel::experimental; + +using annotated_ptr_load1 = annotated_ptr< + float, decltype(properties( + read_hint>))>; +using annotated_ptr_load2 = annotated_ptr< + float, + decltype(properties( + read_hint, + cache_control>))>; +using annotated_ptr_load3 = annotated_ptr< + float, + decltype(properties( + read_hint>))>; +using annotated_ptr_load4 = + annotated_ptr>))>; +using annotated_ptr_load5 = annotated_ptr< + float, + decltype(properties( + write_hint>))>; + +void cache_control_read_func(queue q) { + float *ArrayA = malloc_shared(10, q); + q.submit([&](handler &cgh) { + cgh.single_task<>([=]() { + // expected-error@sycl/ext/intel/experimental/cache_control_properties.hpp:* {{Duplicate cache_level L3 specification}} + annotated_ptr_load1 src1{&ArrayA[0]}; + + // expected-error@sycl/ext/intel/experimental/cache_control_properties.hpp:* {{Conflicting cache_mode at L3}} + annotated_ptr_load2 src2{&ArrayA[0]}; + + // expected-error@sycl/ext/intel/experimental/cache_control_properties.hpp:* {{read_hint must specify cache_mode uncached, cached or streaming}} + annotated_ptr_load3 src3{&ArrayA[0]}; + + // expected-error@sycl/ext/intel/experimental/cache_control_properties.hpp:* {{read_assertion must specify cache_mode invalidate or constant}} + annotated_ptr_load4 src4{&ArrayA[0]}; + + // expected-error@sycl/ext/intel/experimental/cache_control_properties.hpp:* {{write_hint must specify cache_mode uncached, write_through, write_back or streaming}} + annotated_ptr_load5 src5{&ArrayA[0]}; + }); + }); +}