intel
diff --git a/‎clang/include/clang/Basic/Attr.td‎
Lines changed: 8 additions & 0 deletions b/‎clang/include/clang/Basic/Attr.td‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎clang/include/clang/Basic/AttrDocs.td‎
Lines changed: 14 additions & 0 deletions b/‎clang/include/clang/Basic/AttrDocs.td‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎clang/lib/AST/Decl.cpp‎
Lines changed: 7 additions & 0 deletions b/‎clang/lib/AST/Decl.cpp‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎clang/lib/CodeGen/CGBuiltin.cpp‎
Lines changed: 2 additions & 5 deletions b/‎clang/lib/CodeGen/CGBuiltin.cpp‎
Lines changed: 2 additions & 5 deletions
diff --git a/‎clang/lib/CodeGen/CodeGenModule.cpp‎
Lines changed: 34 additions & 0 deletions b/‎clang/lib/CodeGen/CodeGenModule.cpp‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎clang/lib/Sema/SemaDecl.cpp‎
Lines changed: 20 additions & 0 deletions b/‎clang/lib/Sema/SemaDecl.cpp‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎clang/lib/Sema/SemaDeclAttr.cpp‎
Lines changed: 3 additions & 0 deletions b/‎clang/lib/Sema/SemaDeclAttr.cpp‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎clang/lib/Sema/SemaOverload.cpp‎
Lines changed: 35 additions & 0 deletions b/‎clang/lib/Sema/SemaOverload.cpp‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎clang/test/CodeGenSYCL/sycl-device-only.cpp‎
Lines changed: 37 additions & 0 deletions b/‎clang/test/CodeGenSYCL/sycl-device-only.cpp‎
Lines changed: 37 additions & 0 deletions
@@ -1607,6 +1607,14 @@ def SYCLDevice : InheritableAttr {
   let Documentation = [SYCLDeviceDocs];
 }
 
+def SYCLDeviceOnly : InheritableAttr {
+  let Spellings = [Clang<"sycl_device_only">];
+  let Subjects = SubjectList<[Function]>;
+  let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost];
+  let Documentation = [SYCLDeviceOnlyDocs];
+}
+def : MutualExclusions<[SYCLDevice, SYCLDeviceOnly]>;
+
 def SYCLGlobalVar : InheritableAttr {
   let Spellings = [GNU<"sycl_global_var">];
   let Subjects = SubjectList<[GlobalStorageNonLocalVar], ErrorDiag>;
 
@@ -4518,6 +4518,20 @@ implicitly inherit this attribute.
   }];
 }
 
+def SYCLDeviceOnlyDocs : Documentation {
+  let Category = DocCatFunction;
+  let Heading = "sycl_device_only";
+  let Content = [{
+This attribute can only be applied to functions and indicates that the function
+is only available for the device. It allows functions marked with it to
+overload existing functions without the attribute, in which case the overload
+with the attribute will be used on the device side and the overload without
+will be used on the host side. Note: as opposed to ``sycl_device`` this does
+not mark the function as being exported, both attributes are incompatible and
+can't be used together.
+  }];
+}
+
 def RISCVInterruptDocs : Documentation {
   let Category = DocCatFunction;
   let Heading = "interrupt (RISC-V)";
 
@@ -3729,6 +3729,13 @@ unsigned FunctionDecl::getBuiltinID(bool ConsiderWrapperFunctions) const {
       !(BuiltinID == Builtin::BIprintf || BuiltinID == Builtin::BImalloc))
     return 0;
 
+  // SYCL doesn't have a device-side standard library. SYCLDeviceOnlyAttr may
+  // be used to provide device-side definitions of standard functions, so
+  // anything with that attribute shouldn't be treated as a builtin.
+  if (Context.getLangOpts().isSYCL() && hasAttr<SYCLDeviceOnlyAttr>()) {
+    return 0;
+  }
+
   // As AMDGCN implementation of OpenMP does not have a device-side standard
   // library, none of the predefined library functions except printf and malloc
   // should be treated as a builtin i.e. 0 should be returned for them.
 
@@ -2782,10 +2782,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
       GenerateIntrinsics =
           ConstWithoutErrnoOrExceptions && ErrnoOverridenToFalseWithOpt;
   }
-  bool IsSYCLDeviceWithoutIntrinsics =
-      getLangOpts().SYCLIsDevice &&
-      (getTarget().getTriple().isNVPTX() || getTarget().getTriple().isAMDGCN());
-  if (GenerateIntrinsics && !IsSYCLDeviceWithoutIntrinsics) {
+  if (GenerateIntrinsics) {
     switch (BuiltinIDIfNoAsmLabel) {
     case Builtin::BIacos:
     case Builtin::BIacosf:
@@ -3885,7 +3882,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
   case Builtin::BI__builtin_modf:
   case Builtin::BI__builtin_modff:
   case Builtin::BI__builtin_modfl:
-    if (Builder.getIsFPConstrained() || IsSYCLDeviceWithoutIntrinsics)
+    if (Builder.getIsFPConstrained())
       break; // TODO: Emit constrained modf intrinsic once one exists.
     return RValue::get(emitModfBuiltin(*this, E, Intrinsic::modf));
   case Builtin::BI__builtin_isgreater:
 
@@ -4357,6 +4357,12 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
     }
   }
 
+  // Don't emit 'sycl_device_only' function in SYCL host compilation.
+  if (LangOpts.SYCLIsHost && isa<FunctionDecl>(Global) &&
+      Global->hasAttr<SYCLDeviceOnlyAttr>()) {
+    return;
+  }
+
   if (LangOpts.OpenMP) {
     // If this is OpenMP, check if it is legal to emit this global normally.
     if (OpenMPRuntime && OpenMPRuntime->emitTargetGlobal(GD))
@@ -4446,6 +4452,34 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
     }
   }
 
+  // When using SYCLDeviceOnlyAttr, there can be two functions with the same
+  // mangling, the host function and the device overload. So when compiling for
+  // device we need to make sure we're selecting the SYCLDeviceOnlyAttr
+  // overload and dropping the host overload.
+  if (LangOpts.SYCLIsDevice) {
+    StringRef MangledName = getMangledName(GD);
+    auto DDI = DeferredDecls.find(MangledName);
+    // If we have an existing declaration with the same mangling for this
+    // symbol it may be a SYCLDeviceOnlyAttr case.
+    if (DDI != DeferredDecls.end()) {
+      auto *PreviousGlobal = cast<ValueDecl>(DDI->second.getDecl());
+      // If the host declaration was already processed, replace it with the
+      // device only declaration.
+      if (!PreviousGlobal->hasAttr<SYCLDeviceOnlyAttr>() &&
+          Global->hasAttr<SYCLDeviceOnlyAttr>()) {
+        DeferredDecls[MangledName] = GD;
+        return;
+      }
+
+      // If the device only declaration was already processed, skip the
+      // host declaration.
+      if (PreviousGlobal->hasAttr<SYCLDeviceOnlyAttr>() &&
+          !Global->hasAttr<SYCLDeviceOnlyAttr>()) {
+        return;
+      }
+    }
+  }
+
   // clang::ParseAST ensures that we emit the SYCL devices at the end, so
   // anything that is a device (or indirectly called) will be handled later.
   if (LangOpts.SYCLIsDevice && MustBeEmitted(Global)) {
 
@@ -1486,6 +1486,17 @@ void Sema::ActOnExitFunctionContext() {
 static bool AllowOverloadingOfFunction(const LookupResult &Previous,
                                        ASTContext &Context,
                                        const FunctionDecl *New) {
+  // SYCLDeviceOnlyAttr allows device side overloads of SYCL function, but it
+  // is incompatible with SYCLDeviceAttr, so don't allow overloads when both
+  // attributes are present.
+  if (Context.getLangOpts().isSYCL() &&
+      Previous.getResultKind() == LookupResultKind::Found &&
+      ((New->hasAttr<SYCLDeviceOnlyAttr>() &&
+        Previous.getFoundDecl()->hasAttr<SYCLDeviceAttr>()) ||
+       (New->hasAttr<SYCLDeviceAttr>() &&
+        Previous.getFoundDecl()->hasAttr<SYCLDeviceOnlyAttr>())))
+    return false;
+
   if (Context.getLangOpts().CPlusPlus || New->hasAttr<OverloadableAttr>())
     return true;
 
@@ -3702,6 +3713,11 @@ bool Sema::MergeFunctionDecl(FunctionDecl *New, NamedDecl *&OldD, Scope *S,
     return true;
   }
 
+  // Never merge SYCLDeviceOnlyAttr functions in their host variant
+  if (getLangOpts().isSYCL() &&
+      Old->hasAttr<SYCLDeviceOnlyAttr>() != New->hasAttr<SYCLDeviceOnlyAttr>())
+    return false;
+
   diag::kind PrevDiag;
   SourceLocation OldLocation;
   std::tie(PrevDiag, OldLocation) =
@@ -7354,6 +7370,10 @@ static bool isIncompleteDeclExternC(Sema &S, const T *D) {
     if (S.getLangOpts().CUDA && (D->template hasAttr<CUDADeviceAttr>() ||
                                  D->template hasAttr<CUDAHostAttr>()))
       return false;
+
+    // So does SYCL's device_only attribute.
+    if (S.getLangOpts().isSYCL() && D->template hasAttr<SYCLDeviceOnlyAttr>())
+      return false;
   }
   return D->isExternC();
 }
 
@@ -7224,6 +7224,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
   case ParsedAttr::AT_SYCLDevice:
     S.SYCL().handleSYCLDeviceAttr(D, AL);
     break;
+  case ParsedAttr::AT_SYCLDeviceOnly:
+    handleSimpleAttribute<SYCLDeviceOnlyAttr>(S, D, AL);
+    break;
   case ParsedAttr::AT_SYCLScope:
     S.SYCL().handleSYCLScopeAttr(D, AL);
     break;
 
@@ -1629,6 +1629,23 @@ static bool IsOverloadOrOverrideImpl(Sema &SemaRef, FunctionDecl *New,
     }
   }
 
+  // Allow overloads with SYCLDeviceOnlyAttr
+  if (SemaRef.getLangOpts().isSYCL() && (Old->hasAttr<SYCLDeviceOnlyAttr>() !=
+                                         New->hasAttr<SYCLDeviceOnlyAttr>())) {
+    // SYCLDeviceOnlyAttr and SYCLDeviceAttr functions can't overload
+    if (((New->hasAttr<SYCLDeviceOnlyAttr>() &&
+          Old->hasAttr<SYCLDeviceAttr>()) ||
+         (New->hasAttr<SYCLDeviceAttr>() &&
+          Old->hasAttr<SYCLDeviceOnlyAttr>()))) {
+      SemaRef.Diag(New->getLocation(), diag::err_redefinition)
+          << New->getDeclName();
+      SemaRef.notePreviousDefinition(Old, New->getLocation());
+      return false;
+    }
+
+    return true;
+  }
+
   // The signatures match; this is not an overload.
   return false;
 }
@@ -11020,6 +11037,15 @@ bool clang::isBetterOverloadCandidate(
            S.CUDA().IdentifyPreference(Caller, Cand2.Function);
   }
 
+  // In SYCL device compilation mode prefer the overload with the
+  // SYCLDeviceOnly attribute.
+  if (S.getLangOpts().SYCLIsDevice && Cand1.Function && Cand2.Function) {
+    if (Cand1.Function->hasAttr<SYCLDeviceOnlyAttr>() !=
+        Cand2.Function->hasAttr<SYCLDeviceOnlyAttr>()) {
+      return Cand1.Function->hasAttr<SYCLDeviceOnlyAttr>();
+    }
+  }
+
   // General member function overloading is handled above, so this only handles
   // constructors with address spaces.
   // This only handles address spaces since C++ has no other
@@ -11374,6 +11400,15 @@ OverloadingResult OverloadCandidateSet::BestViableFunctionImpl(
   if (S.getLangOpts().CUDA)
     CudaExcludeWrongSideCandidates(S, Candidates);
 
+  // In SYCL host compilation remove candidates marked SYCLDeviceOnly.
+  if (S.getLangOpts().SYCLIsHost) {
+    auto IsDeviceCand = [&](const OverloadCandidate *Cand) {
+      return Cand->Viable && Cand->Function &&
+             Cand->Function->hasAttr<SYCLDeviceOnlyAttr>();
+    };
+    llvm::erase_if(Candidates, IsDeviceCand);
+  }
+
   Best = end();
   for (auto *Cand : Candidates) {
     Cand->Best = false;
 
@@ -0,0 +1,37 @@
+// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECKD
+// RUN: %clang_cc1 -fsycl-is-host -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECKH
+// Test code generation for sycl_device_only attribute.
+
+// Verify that the device overload is used on device.
+//
+// CHECK-LABEL: _Z3fooi
+// CHECKH: %add = add nsw i32 %0, 10
+// CHECKD: %add = add nsw i32 %0, 20
+int foo(int a) { return a + 10; }
+__attribute__((sycl_device_only)) int foo(int a) { return a + 20; }
+
+// Use a `sycl_device` function as entry point
+__attribute__((sycl_device)) int bar(int b) { return foo(b); }
+
+// Verify that the order of declaration doesn't change the behavior.
+//
+// CHECK-LABEL: _Z3fooswapi
+// CHECKH: %add = add nsw i32 %0, 10
+// CHECKD: %add = add nsw i32 %0, 20
+__attribute__((sycl_device_only)) int fooswap(int a) { return a + 20; }
+int fooswap(int a) { return a + 10; }
+
+// Use a `sycl_device` function as entry point.
+__attribute__((sycl_device)) int barswap(int b) { return fooswap(b); }
+
+// Verify that in extern C the attribute enables mangling.
+extern "C" {
+// CHECK-LABEL: _Z3fooci
+// CHECKH: %add = add nsw i32 %0, 10
+// CHECKD: %add = add nsw i32 %0, 20
+int fooc(int a) { return a + 10; }
+__attribute__((sycl_device_only)) int fooc(int a) { return a + 20; }
+
+// Use a `sycl_device` function as entry point.
+__attribute__((sycl_device)) int barc(int b) { return fooc(b); }
+}