From 60114f9cdce11bb6f9357437b5ccc45d13b1046c Mon Sep 17 00:00:00 2001 From: Harald van Dijk Date: Fri, 6 Jun 2025 17:48:28 +0100 Subject: [PATCH] [SYCL][NativeCPU] Limit generic ABI to builtins. In #17408, NativeCPU became a target in order to be able to pick the ABI for its own libclc functions consistently, without having targets affect this. This was, and is, required to be able to use libclc independent of target and target options. However, it breaks some calls into libc. Therefore, this PR allows the calling convention to be explicitly specified, ensures it is specified for any libclc functions, and ensures it is not specified for any libc functions. Fixes the SYCL-E2E acos, cmath, and exp-std-complex tests. --- clang/include/clang/Basic/TargetInfo.h | 5 + clang/lib/Basic/Targets/NativeCPU.cpp | 1 + clang/lib/Basic/Targets/NativeCPU.h | 5 + clang/lib/CodeGen/Targets/NativeCPU.cpp | 13 + clang/lib/Sema/SemaLookup.cpp | 2 +- libclc/CMakeLists.txt | 2 - libclc/clc/include/clc/clcfunc.h | 4 + libclc/libspirv/lib/native_cpu/math/helpers.h | 14 +- libdevice/nativecpu_utils.cpp | 5 +- llvm/lib/Linker/IRMover.cpp | 7 + sycl/include/sycl/builtins.hpp | 28 +- .../sycl/detail/defines_elementary.hpp | 9 + sycl/include/sycl/group.hpp | 20 +- sycl/include/sycl/stl_wrappers/cmath | 327 +++++++++--------- 14 files changed, 259 insertions(+), 183 deletions(-) diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index 8dbacdd3703a..1c6b8a660353 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -1705,6 +1705,11 @@ public: return CC_C; } + /// Gets the calling convention for OpenCL built-ins for the given target. + virtual CallingConv getOpenCLCallingConv() const { + return getDefaultCallingConv(); + } + /// Get the default atomic options. AtomicOptions getAtomicOpts() const { return AtomicOpts; } diff --git a/clang/lib/Basic/Targets/NativeCPU.cpp b/clang/lib/Basic/Targets/NativeCPU.cpp index 65db0613b7c4..276086d4aa5e 100644 --- a/clang/lib/Basic/Targets/NativeCPU.cpp +++ b/clang/lib/Basic/Targets/NativeCPU.cpp @@ -68,4 +68,5 @@ void NativeCPUTargetInfo::setAuxTarget(const TargetInfo *Aux) { assert(Aux && "Cannot invoke setAuxTarget without a valid auxiliary target!"); copyAuxTarget(Aux); getTargetOpts() = Aux->getTargetOpts(); + resetDataLayout(Aux->getDataLayoutString()); } diff --git a/clang/lib/Basic/Targets/NativeCPU.h b/clang/lib/Basic/Targets/NativeCPU.h index cb2c71ebe39b..e5b342f49346 100644 --- a/clang/lib/Basic/Targets/NativeCPU.h +++ b/clang/lib/Basic/Targets/NativeCPU.h @@ -49,7 +49,12 @@ public: void setSupportedOpenCLOpts() override { supportAllOpenCLOpts(); } + CallingConv getOpenCLCallingConv() const override { return CC_IntelOclBicc; } + CallingConvCheckResult checkCallingConvention(CallingConv CC) const override { + if (CC == CC_IntelOclBicc) + return CCCR_OK; + if (HostTarget) return HostTarget->checkCallingConvention(CC); diff --git a/clang/lib/CodeGen/Targets/NativeCPU.cpp b/clang/lib/CodeGen/Targets/NativeCPU.cpp index 313878c008f8..9aa6f38ad38b 100644 --- a/clang/lib/CodeGen/Targets/NativeCPU.cpp +++ b/clang/lib/CodeGen/Targets/NativeCPU.cpp @@ -20,6 +20,8 @@ private: public: NativeCPUABIInfo(CodeGen::CodeGenTypes &CGT, const ABIInfo *HostABIInfo) : DefaultABIInfo(CGT), HostABIInfo(HostABIInfo) {} + + void computeInfo(CGFunctionInfo &FI) const override; }; class NativeCPUTargetCodeGenInfo : public TargetCodeGenInfo { @@ -37,6 +39,17 @@ public: }; } // namespace +void NativeCPUABIInfo::computeInfo(CGFunctionInfo &FI) const { + if (HostABIInfo && + FI.getCallingConvention() != llvm::CallingConv::Intel_OCL_BI) { + HostABIInfo->computeInfo(FI); + return; + } + + DefaultABIInfo::computeInfo(FI); + FI.setEffectiveCallingConvention(llvm::CallingConv::C); +} + std::unique_ptr CodeGen::createNativeCPUTargetCodeGenInfo( CodeGenModule &CGM, std::unique_ptr HostTargetCodeGenInfo) { diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp index 003df2ab24ef..e74b6c2c8379 100644 --- a/clang/lib/Sema/SemaLookup.cpp +++ b/clang/lib/Sema/SemaLookup.cpp @@ -789,7 +789,7 @@ static void GetProgModelBuiltinFctOverloads( std::vector &FunctionList, SmallVector &RetTypes, SmallVector, 5> &ArgTypes, bool IsVariadic) { FunctionProtoType::ExtProtoInfo PI( - Context.getTargetInfo().getDefaultCallingConv()); + Context.getTargetInfo().getOpenCLCallingConv()); PI.Variadic = IsVariadic; PI.ExceptionSpec = FunctionProtoType::ExceptionSpecInfo{EST_BasicNoexcept}; diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt index b4335f18f394..b83b4d01046b 100644 --- a/libclc/CMakeLists.txt +++ b/libclc/CMakeLists.txt @@ -482,8 +482,6 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) set( opt_flags -O3 "--nvvm-reflect-enable=false" ) elseif( ARCH STREQUAL amdgcn ) set( opt_flags -O3 --amdgpu-oclc-reflect-enable=false ) - elseif( ARCH STREQUAL native_cpu ) - set( opt_flags -O3 ) else() set( opt_flags -O3 ) set( MACRO_ARCH ${ARCH} ) diff --git a/libclc/clc/include/clc/clcfunc.h b/libclc/clc/include/clc/clcfunc.h index b1a173d73496..fc712935d51b 100644 --- a/libclc/clc/include/clc/clcfunc.h +++ b/libclc/clc/include/clc/clcfunc.h @@ -22,6 +22,10 @@ #define _CLC_DEF #elif defined(CLC_CLSPV) #define _CLC_DEF __attribute__((noinline)) __attribute__((clspv_libclc_builtin)) +#elif defined(CLC_NATIVE_CPU) +#define _CLC_DEF __attribute__((always_inline)) __attribute__((intel_ocl_bicc)) +#undef _CLC_DECL +#define _CLC_DECL __attribute__((intel_ocl_bicc)) #else #define _CLC_DEF __attribute__((always_inline)) #endif diff --git a/libclc/libspirv/lib/native_cpu/math/helpers.h b/libclc/libspirv/lib/native_cpu/math/helpers.h index 2f4498e6cd4c..8a303462f5ac 100644 --- a/libclc/libspirv/lib/native_cpu/math/helpers.h +++ b/libclc/libspirv/lib/native_cpu/math/helpers.h @@ -15,12 +15,12 @@ // __builtin_elementwise_##NAME #ifndef IS_FABS #define GEN_UNARY_VECTOR_BUILTIN(NAME, TYPE, NUM) \ - _CLC_OVERLOAD TYPE##NUM GETNAME(NAME)(TYPE##NUM n) { \ + _CLC_DEF _CLC_OVERLOAD TYPE##NUM GETNAME(NAME)(TYPE##NUM n) { \ return __builtin_elementwise_##NAME(n); \ } #else #define GEN_UNARY_VECTOR_BUILTIN(NAME, TYPE, NUM) \ - _CLC_OVERLOAD TYPE##NUM GETNAME(NAME)(TYPE##NUM n) { \ + _CLC_DEF _CLC_OVERLOAD TYPE##NUM GETNAME(NAME)(TYPE##NUM n) { \ return __builtin_elementwise_abs(n); \ } #endif @@ -33,7 +33,9 @@ GEN_UNARY_VECTOR_BUILTIN(NAME, TYPE, 16) #define GEN_UNARY_BUILTIN_T(NAME, TYPE) \ - _CLC_OVERLOAD TYPE GETNAME(NAME)(TYPE n) { return __builtin_##NAME(n); } + _CLC_DEF _CLC_OVERLOAD TYPE GETNAME(NAME)(TYPE n) { \ + return __builtin_##NAME(n); \ + } #if defined(cl_khr_fp16) #define GEN_UNARY_FP16(NAME) \ @@ -58,8 +60,8 @@ GEN_UNARY_FP64(NAME) #define GEN_TERNARY_VECTOR_BUILTIN(NAME, TYPE, NUM) \ - _CLC_OVERLOAD TYPE##NUM GETNAME(NAME)(TYPE##NUM n1, TYPE##NUM n2, \ - TYPE##NUM n3) { \ + _CLC_DEF _CLC_OVERLOAD TYPE##NUM GETNAME(NAME)(TYPE##NUM n1, TYPE##NUM n2, \ + TYPE##NUM n3) { \ return __builtin_elementwise_##NAME(n1, n2, n3); \ } @@ -71,7 +73,7 @@ GEN_TERNARY_VECTOR_BUILTIN(NAME, TYPE, 16) #define GEN_TERNARY_BUILTIN_T(NAME, TYPE) \ - _CLC_OVERLOAD TYPE GETNAME(NAME)(TYPE n1, TYPE n2, TYPE n3) { \ + _CLC_DEF _CLC_OVERLOAD TYPE GETNAME(NAME)(TYPE n1, TYPE n2, TYPE n3) { \ return __builtin_##NAME(n1, n2, n3); \ } diff --git a/libdevice/nativecpu_utils.cpp b/libdevice/nativecpu_utils.cpp index 819fd0910858..71eacb1f6a0e 100644 --- a/libdevice/nativecpu_utils.cpp +++ b/libdevice/nativecpu_utils.cpp @@ -25,9 +25,10 @@ using __nativecpu_state = native_cpu::state; #undef DEVICE_EXTERNAL #undef DEVICE_EXTERN_C -#define DEVICE_EXTERN_C extern "C" SYCL_EXTERNAL +#define DEVICE_EXTERN_C extern "C" SYCL_EXTERNAL __attribute__((intel_ocl_bicc)) #define DEVICE_EXTERNAL_C DEVICE_EXTERN_C __attribute__((always_inline)) -#define DEVICE_EXTERNAL SYCL_EXTERNAL __attribute__((always_inline)) +#define DEVICE_EXTERNAL \ + SYCL_EXTERNAL __attribute__((always_inline, intel_ocl_bicc)) // Several functions are used implicitly by WorkItemLoopsPass and // PrepareSYCLNativeCPUPass and need to be marked as used to prevent them being diff --git a/llvm/lib/Linker/IRMover.cpp b/llvm/lib/Linker/IRMover.cpp index 9ec9122555f2..45283ead6609 100644 --- a/llvm/lib/Linker/IRMover.cpp +++ b/llvm/lib/Linker/IRMover.cpp @@ -1480,6 +1480,13 @@ Error IRLinker::run() { EnableTripleWarning = !SrcHasLibDeviceTriple; EnableDLWarning = !(SrcHasLibDeviceTriple && SrcHasLibDeviceDL); } + // Likewise, during SYCL Native CPU compilation we link with bitcode with a + // generic data layout, which is compatible with the concrete host data layout + // and the concrete host target that we use later on. + if (SrcTriple.isNativeCPU()) { + EnableDLWarning = false; + EnableTripleWarning = false; + } if (EnableDLWarning && (SrcM->getDataLayout() != DstM.getDataLayout())) { emitWarning("Linking two modules of different data layouts: '" + diff --git a/sycl/include/sycl/builtins.hpp b/sycl/include/sycl/builtins.hpp index 8969bfddebda..45a2e12cf1aa 100644 --- a/sycl/include/sycl/builtins.hpp +++ b/sycl/include/sycl/builtins.hpp @@ -15,13 +15,13 @@ #ifdef __SYCL_DEVICE_ONLY__ extern "C" { -extern __DPCPP_SYCL_EXTERNAL void *memcpy(void *dest, const void *src, - size_t n); -extern __DPCPP_SYCL_EXTERNAL void *memset(void *dest, int c, size_t n); -extern __DPCPP_SYCL_EXTERNAL int memcmp(const void *s1, const void *s2, - size_t n); -extern __DPCPP_SYCL_EXTERNAL int rand(); -extern __DPCPP_SYCL_EXTERNAL void srand(unsigned int seed); +extern __DPCPP_SYCL_EXTERNAL_LIBC void *memcpy(void *dest, const void *src, + size_t n); +extern __DPCPP_SYCL_EXTERNAL_LIBC void *memset(void *dest, int c, size_t n); +extern __DPCPP_SYCL_EXTERNAL_LIBC int memcmp(const void *s1, const void *s2, + size_t n); +extern __DPCPP_SYCL_EXTERNAL_LIBC int rand(); +extern __DPCPP_SYCL_EXTERNAL_LIBC void srand(unsigned int seed); extern __DPCPP_SYCL_EXTERNAL long long int __imf_llmax(long long int x, long long int y); extern __DPCPP_SYCL_EXTERNAL long long int __imf_llmin(long long int x, @@ -634,15 +634,15 @@ extern __DPCPP_SYCL_EXTERNAL int __imf_vimin_s32_relu(int x, int y); } #ifdef __GLIBC__ namespace std { -extern __DPCPP_SYCL_EXTERNAL void +extern __DPCPP_SYCL_EXTERNAL_LIBC void __glibcxx_assert_fail(const char *file, int line, const char *func, const char *cond) noexcept; } // namespace std extern "C" { -extern __DPCPP_SYCL_EXTERNAL void __assert_fail(const char *expr, - const char *file, - unsigned int line, - const char *func); +extern __DPCPP_SYCL_EXTERNAL_LIBC void __assert_fail(const char *expr, + const char *file, + unsigned int line, + const char *func); } #elif defined(_WIN32) extern "C" { @@ -652,8 +652,8 @@ extern "C" { // APIs used by STL, such as _Cosh, are undocumented, even though // they are open-sourced. Recognizing them as builtins is not // straightforward currently. -extern __DPCPP_SYCL_EXTERNAL void _wassert(const wchar_t *wexpr, - const wchar_t *wfile, unsigned line); +extern __DPCPP_SYCL_EXTERNAL_LIBC void +_wassert(const wchar_t *wexpr, const wchar_t *wfile, unsigned line); } #endif #endif // __SYCL_DEVICE_ONLY__ diff --git a/sycl/include/sycl/detail/defines_elementary.hpp b/sycl/include/sycl/detail/defines_elementary.hpp index 17107c9216b3..ba3401496849 100644 --- a/sycl/include/sycl/detail/defines_elementary.hpp +++ b/sycl/include/sycl/detail/defines_elementary.hpp @@ -25,7 +25,12 @@ #endif // __SYCL_ALWAYS_INLINE #ifdef SYCL_EXTERNAL +#ifdef __NativeCPU__ +#define __DPCPP_SYCL_EXTERNAL SYCL_EXTERNAL __attribute__((intel_ocl_bicc)) +#define __DPCPP_SYCL_EXTERNAL_LIBC SYCL_EXTERNAL +#else #define __DPCPP_SYCL_EXTERNAL SYCL_EXTERNAL +#endif #else #ifdef __SYCL_DEVICE_ONLY__ #define __DPCPP_SYCL_EXTERNAL __attribute__((sycl_device)) @@ -35,6 +40,10 @@ #endif #endif +#ifndef __DPCPP_SYCL_EXTERNAL_LIBC +#define __DPCPP_SYCL_EXTERNAL_LIBC __DPCPP_SYCL_EXTERNAL +#endif + // Helper for enabling empty-base optimizations on MSVC. // TODO: Remove this when MSVC has this optimization enabled by default. #ifdef _MSC_VER diff --git a/sycl/include/sycl/group.hpp b/sycl/include/sycl/group.hpp index 9402d2fb83d0..be60a681e410 100644 --- a/sycl/include/sycl/group.hpp +++ b/sycl/include/sycl/group.hpp @@ -175,8 +175,18 @@ public: bool leader() const { return (get_local_linear_id() == 0); } + // Note: These signatures for parallel_for_work_item are intentionally + // non-conforming. The spec says this should take const WorkItemFunctionT &, + // but we take it by value, and rely on passing by value being done as passing + // a copy by reference (ptr byval) to ensure that the special handling in + // SYCLLowerWGScopePass to mutate the passed functor object works. + template - void parallel_for_work_item(WorkItemFunctionT Func) const { +#ifdef __NativeCPU__ + __attribute__((__intel_ocl_bicc__)) +#endif + void + parallel_for_work_item(WorkItemFunctionT Func) const { // need barriers to enforce SYCL semantics for the work item loop - // compilers are expected to optimize when possible detail::workGroupBarrier(); @@ -227,8 +237,12 @@ public: } template - void parallel_for_work_item(range flexibleRange, - WorkItemFunctionT Func) const { +#ifdef __NativeCPU__ + __attribute__((__intel_ocl_bicc__)) +#endif + void + parallel_for_work_item(range flexibleRange, + WorkItemFunctionT Func) const { detail::workGroupBarrier(); #ifdef __SYCL_DEVICE_ONLY__ range GlobalSize{ diff --git a/sycl/include/sycl/stl_wrappers/cmath b/sycl/include/sycl/stl_wrappers/cmath index c25eadf6394a..0a2a576aa1aa 100644 --- a/sycl/include/sycl/stl_wrappers/cmath +++ b/sycl/include/sycl/stl_wrappers/cmath @@ -20,154 +20,171 @@ #ifdef __SYCL_DEVICE_ONLY__ extern "C" { -extern __DPCPP_SYCL_EXTERNAL int abs(int x); -extern __DPCPP_SYCL_EXTERNAL long int labs(long int x); -extern __DPCPP_SYCL_EXTERNAL long long int llabs(long long int x); +extern __DPCPP_SYCL_EXTERNAL_LIBC int abs(int x); +extern __DPCPP_SYCL_EXTERNAL_LIBC long int labs(long int x); +extern __DPCPP_SYCL_EXTERNAL_LIBC long long int llabs(long long int x); -extern __DPCPP_SYCL_EXTERNAL div_t div(int x, int y); -extern __DPCPP_SYCL_EXTERNAL ldiv_t ldiv(long int x, long int y); -extern __DPCPP_SYCL_EXTERNAL lldiv_t lldiv(long long int x, long long int y); -extern __DPCPP_SYCL_EXTERNAL float scalbnf(float x, int n); -extern __DPCPP_SYCL_EXTERNAL double scalbn(double x, int n); -extern __DPCPP_SYCL_EXTERNAL float logf(float x); -extern __DPCPP_SYCL_EXTERNAL double log(double x); -extern __DPCPP_SYCL_EXTERNAL float expf(float x); -extern __DPCPP_SYCL_EXTERNAL double exp(double x); -extern __DPCPP_SYCL_EXTERNAL float log10f(float x); -extern __DPCPP_SYCL_EXTERNAL double log10(double x); -extern __DPCPP_SYCL_EXTERNAL float modff(float x, float *intpart); -extern __DPCPP_SYCL_EXTERNAL double modf(double x, double *intpart); -extern __DPCPP_SYCL_EXTERNAL float exp2f(float x); -extern __DPCPP_SYCL_EXTERNAL double exp2(double x); -extern __DPCPP_SYCL_EXTERNAL float expm1f(float x); -extern __DPCPP_SYCL_EXTERNAL double expm1(double x); -extern __DPCPP_SYCL_EXTERNAL int ilogbf(float x); -extern __DPCPP_SYCL_EXTERNAL int ilogb(double x); -extern __DPCPP_SYCL_EXTERNAL float log1pf(float x); -extern __DPCPP_SYCL_EXTERNAL double log1p(double x); -extern __DPCPP_SYCL_EXTERNAL float log2f(float x); -extern __DPCPP_SYCL_EXTERNAL double log2(double x); -extern __DPCPP_SYCL_EXTERNAL float logbf(float x); -extern __DPCPP_SYCL_EXTERNAL double logb(double x); -extern __DPCPP_SYCL_EXTERNAL float sqrtf(float x); -extern __DPCPP_SYCL_EXTERNAL double sqrt(double x); -extern __DPCPP_SYCL_EXTERNAL float cbrtf(float x); -extern __DPCPP_SYCL_EXTERNAL double cbrt(double x); -extern __DPCPP_SYCL_EXTERNAL float erff(float x); -extern __DPCPP_SYCL_EXTERNAL double erf(double x); -extern __DPCPP_SYCL_EXTERNAL float erfcf(float x); -extern __DPCPP_SYCL_EXTERNAL double erfc(double x); -extern __DPCPP_SYCL_EXTERNAL float tgammaf(float x); -extern __DPCPP_SYCL_EXTERNAL double tgamma(double x); -extern __DPCPP_SYCL_EXTERNAL float lgammaf(float x); -extern __DPCPP_SYCL_EXTERNAL double lgamma(double x); -extern __DPCPP_SYCL_EXTERNAL float fmodf(float x, float y); -extern __DPCPP_SYCL_EXTERNAL double fmod(double x, double y); -extern __DPCPP_SYCL_EXTERNAL float remainderf(float x, float y); -extern __DPCPP_SYCL_EXTERNAL double remainder(double x, double y); -extern __DPCPP_SYCL_EXTERNAL float remquof(float x, float y, int *q); -extern __DPCPP_SYCL_EXTERNAL double remquo(double x, double y, int *q); -extern __DPCPP_SYCL_EXTERNAL float nextafterf(float x, float y); -extern __DPCPP_SYCL_EXTERNAL double nextafter(double x, double y); -extern __DPCPP_SYCL_EXTERNAL float fdimf(float x, float y); -extern __DPCPP_SYCL_EXTERNAL double fdim(double x, double y); -extern __DPCPP_SYCL_EXTERNAL float fmaf(float x, float y, float z); -extern __DPCPP_SYCL_EXTERNAL double fma(double x, double y, double z); -extern __DPCPP_SYCL_EXTERNAL float sinf(float x); -extern __DPCPP_SYCL_EXTERNAL double sin(double x); -extern __DPCPP_SYCL_EXTERNAL float cosf(float x); -extern __DPCPP_SYCL_EXTERNAL double cos(double x); -extern __DPCPP_SYCL_EXTERNAL float tanf(float x); -extern __DPCPP_SYCL_EXTERNAL double tan(double x); -extern __DPCPP_SYCL_EXTERNAL float asinf(float x); -extern __DPCPP_SYCL_EXTERNAL double asin(double x); -extern __DPCPP_SYCL_EXTERNAL float acosf(float x); -extern __DPCPP_SYCL_EXTERNAL double acos(double x); -extern __DPCPP_SYCL_EXTERNAL float atanf(float x); -extern __DPCPP_SYCL_EXTERNAL double atan(double x); -extern __DPCPP_SYCL_EXTERNAL float powf(float x, float y); -extern __DPCPP_SYCL_EXTERNAL double pow(double x, double y); -extern __DPCPP_SYCL_EXTERNAL float atan2f(float x, float y); -extern __DPCPP_SYCL_EXTERNAL double atan2(double x, double y); - -extern __DPCPP_SYCL_EXTERNAL float sinhf(float x); -extern __DPCPP_SYCL_EXTERNAL double sinh(double x); -extern __DPCPP_SYCL_EXTERNAL float coshf(float x); -extern __DPCPP_SYCL_EXTERNAL double cosh(double x); -extern __DPCPP_SYCL_EXTERNAL float tanhf(float x); -extern __DPCPP_SYCL_EXTERNAL double tanh(double x); -extern __DPCPP_SYCL_EXTERNAL float asinhf(float x); -extern __DPCPP_SYCL_EXTERNAL double asinh(double x); -extern __DPCPP_SYCL_EXTERNAL float acoshf(float x); -extern __DPCPP_SYCL_EXTERNAL double acosh(double x); -extern __DPCPP_SYCL_EXTERNAL float atanhf(float x); -extern __DPCPP_SYCL_EXTERNAL double atanh(double x); -extern __DPCPP_SYCL_EXTERNAL double frexp(double x, int *exp); -extern __DPCPP_SYCL_EXTERNAL double ldexp(double x, int exp); -extern __DPCPP_SYCL_EXTERNAL double hypot(double x, double y); +extern __DPCPP_SYCL_EXTERNAL_LIBC div_t div(int x, int y); +extern __DPCPP_SYCL_EXTERNAL_LIBC ldiv_t ldiv(long int x, long int y); +extern __DPCPP_SYCL_EXTERNAL_LIBC lldiv_t lldiv(long long int x, + long long int y); +extern __DPCPP_SYCL_EXTERNAL_LIBC float scalbnf(float x, int n); +extern __DPCPP_SYCL_EXTERNAL_LIBC double scalbn(double x, int n); +extern __DPCPP_SYCL_EXTERNAL_LIBC float logf(float x); +extern __DPCPP_SYCL_EXTERNAL_LIBC double log(double x); +extern __DPCPP_SYCL_EXTERNAL_LIBC float expf(float x); +extern __DPCPP_SYCL_EXTERNAL_LIBC double exp(double x); +extern __DPCPP_SYCL_EXTERNAL_LIBC float log10f(float x); +extern __DPCPP_SYCL_EXTERNAL_LIBC double log10(double x); +extern __DPCPP_SYCL_EXTERNAL_LIBC float modff(float x, float *intpart); +extern __DPCPP_SYCL_EXTERNAL_LIBC double modf(double x, double *intpart); +extern __DPCPP_SYCL_EXTERNAL_LIBC float exp2f(float x); +extern __DPCPP_SYCL_EXTERNAL_LIBC double exp2(double x); +extern __DPCPP_SYCL_EXTERNAL_LIBC float expm1f(float x); +extern __DPCPP_SYCL_EXTERNAL_LIBC double expm1(double x); +extern __DPCPP_SYCL_EXTERNAL_LIBC int ilogbf(float x); +extern __DPCPP_SYCL_EXTERNAL_LIBC int ilogb(double x); +extern __DPCPP_SYCL_EXTERNAL_LIBC float log1pf(float x); +extern __DPCPP_SYCL_EXTERNAL_LIBC double log1p(double x); +extern __DPCPP_SYCL_EXTERNAL_LIBC float log2f(float x); +extern __DPCPP_SYCL_EXTERNAL_LIBC double log2(double x); +extern __DPCPP_SYCL_EXTERNAL_LIBC float logbf(float x); +extern __DPCPP_SYCL_EXTERNAL_LIBC double logb(double x); +extern __DPCPP_SYCL_EXTERNAL_LIBC float sqrtf(float x); +extern __DPCPP_SYCL_EXTERNAL_LIBC double sqrt(double x); +extern __DPCPP_SYCL_EXTERNAL_LIBC float cbrtf(float x); +extern __DPCPP_SYCL_EXTERNAL_LIBC double cbrt(double x); +extern __DPCPP_SYCL_EXTERNAL_LIBC float erff(float x); +extern __DPCPP_SYCL_EXTERNAL_LIBC double erf(double x); +extern __DPCPP_SYCL_EXTERNAL_LIBC float erfcf(float x); +extern __DPCPP_SYCL_EXTERNAL_LIBC double erfc(double x); +extern __DPCPP_SYCL_EXTERNAL_LIBC float tgammaf(float x); +extern __DPCPP_SYCL_EXTERNAL_LIBC double tgamma(double x); +extern __DPCPP_SYCL_EXTERNAL_LIBC float lgammaf(float x); +extern __DPCPP_SYCL_EXTERNAL_LIBC double lgamma(double x); +extern __DPCPP_SYCL_EXTERNAL_LIBC float fmodf(float x, float y); +extern __DPCPP_SYCL_EXTERNAL_LIBC double fmod(double x, double y); +extern __DPCPP_SYCL_EXTERNAL_LIBC float remainderf(float x, float y); +extern __DPCPP_SYCL_EXTERNAL_LIBC double remainder(double x, double y); +extern __DPCPP_SYCL_EXTERNAL_LIBC float remquof(float x, float y, int *q); +extern __DPCPP_SYCL_EXTERNAL_LIBC double remquo(double x, double y, int *q); +extern __DPCPP_SYCL_EXTERNAL_LIBC float nextafterf(float x, float y); +extern __DPCPP_SYCL_EXTERNAL_LIBC double nextafter(double x, double y); +extern __DPCPP_SYCL_EXTERNAL_LIBC float fdimf(float x, float y); +extern __DPCPP_SYCL_EXTERNAL_LIBC double fdim(double x, double y); +extern __DPCPP_SYCL_EXTERNAL_LIBC float fmaf(float x, float y, float z); +extern __DPCPP_SYCL_EXTERNAL_LIBC double fma(double x, double y, double z); +extern __DPCPP_SYCL_EXTERNAL_LIBC float sinf(float x); +extern __DPCPP_SYCL_EXTERNAL_LIBC double sin(double x); +extern __DPCPP_SYCL_EXTERNAL_LIBC float cosf(float x); +extern __DPCPP_SYCL_EXTERNAL_LIBC double cos(double x); +extern __DPCPP_SYCL_EXTERNAL_LIBC float tanf(float x); +extern __DPCPP_SYCL_EXTERNAL_LIBC double tan(double x); +extern __DPCPP_SYCL_EXTERNAL_LIBC float asinf(float x); +extern __DPCPP_SYCL_EXTERNAL_LIBC double asin(double x); +extern __DPCPP_SYCL_EXTERNAL_LIBC float acosf(float x); +extern __DPCPP_SYCL_EXTERNAL_LIBC double acos(double x); +extern __DPCPP_SYCL_EXTERNAL_LIBC float atanf(float x); +extern __DPCPP_SYCL_EXTERNAL_LIBC double atan(double x); +extern __DPCPP_SYCL_EXTERNAL_LIBC float powf(float x, float y); +extern __DPCPP_SYCL_EXTERNAL_LIBC double pow(double x, double y); +extern __DPCPP_SYCL_EXTERNAL_LIBC float atan2f(float x, float y); +extern __DPCPP_SYCL_EXTERNAL_LIBC double atan2(double x, double y); +extern __DPCPP_SYCL_EXTERNAL_LIBC float sinhf(float x); +extern __DPCPP_SYCL_EXTERNAL_LIBC double sinh(double x); +extern __DPCPP_SYCL_EXTERNAL_LIBC float coshf(float x); +extern __DPCPP_SYCL_EXTERNAL_LIBC double cosh(double x); +extern __DPCPP_SYCL_EXTERNAL_LIBC float tanhf(float x); +extern __DPCPP_SYCL_EXTERNAL_LIBC double tanh(double x); +extern __DPCPP_SYCL_EXTERNAL_LIBC float asinhf(float x); +extern __DPCPP_SYCL_EXTERNAL_LIBC double asinh(double x); +extern __DPCPP_SYCL_EXTERNAL_LIBC float acoshf(float x); +extern __DPCPP_SYCL_EXTERNAL_LIBC double acosh(double x); +extern __DPCPP_SYCL_EXTERNAL_LIBC float atanhf(float x); +extern __DPCPP_SYCL_EXTERNAL_LIBC double atanh(double x); +extern __DPCPP_SYCL_EXTERNAL_LIBC double frexp(double x, int *exp); +extern __DPCPP_SYCL_EXTERNAL_LIBC double ldexp(double x, int exp); +extern __DPCPP_SYCL_EXTERNAL_LIBC double hypot(double x, double y); } #ifdef __GLIBC__ extern "C" { -extern __DPCPP_SYCL_EXTERNAL float frexpf(float x, int *exp); -extern __DPCPP_SYCL_EXTERNAL float ldexpf(float x, int exp); -extern __DPCPP_SYCL_EXTERNAL float hypotf(float x, float y); +extern __DPCPP_SYCL_EXTERNAL_LIBC float frexpf(float x, int *exp); +extern __DPCPP_SYCL_EXTERNAL_LIBC float ldexpf(float x, int exp); +extern __DPCPP_SYCL_EXTERNAL_LIBC float hypotf(float x, float y); // MS UCRT supports most of the C standard library but is // an exception. -extern __DPCPP_SYCL_EXTERNAL float cimagf(float __complex__ z); -extern __DPCPP_SYCL_EXTERNAL double cimag(double __complex__ z); -extern __DPCPP_SYCL_EXTERNAL float crealf(float __complex__ z); -extern __DPCPP_SYCL_EXTERNAL double creal(double __complex__ z); -extern __DPCPP_SYCL_EXTERNAL float cargf(float __complex__ z); -extern __DPCPP_SYCL_EXTERNAL double carg(double __complex__ z); -extern __DPCPP_SYCL_EXTERNAL float cabsf(float __complex__ z); -extern __DPCPP_SYCL_EXTERNAL double cabs(double __complex__ z); -extern __DPCPP_SYCL_EXTERNAL float __complex__ cprojf(float __complex__ z); -extern __DPCPP_SYCL_EXTERNAL double __complex__ cproj(double __complex__ z); -extern __DPCPP_SYCL_EXTERNAL float __complex__ cexpf(float __complex__ z); -extern __DPCPP_SYCL_EXTERNAL double __complex__ cexp(double __complex__ z); -extern __DPCPP_SYCL_EXTERNAL float __complex__ clogf(float __complex__ z); -extern __DPCPP_SYCL_EXTERNAL double __complex__ clog(double __complex__ z); -extern __DPCPP_SYCL_EXTERNAL float __complex__ cpowf(float __complex__ z); -extern __DPCPP_SYCL_EXTERNAL double __complex__ cpow(double __complex__ z); -extern __DPCPP_SYCL_EXTERNAL float __complex__ csqrtf(float __complex__ z); -extern __DPCPP_SYCL_EXTERNAL double __complex__ csqrt(double __complex__ z); -extern __DPCPP_SYCL_EXTERNAL float __complex__ csinhf(float __complex__ z); -extern __DPCPP_SYCL_EXTERNAL double __complex__ csinh(double __complex__ z); -extern __DPCPP_SYCL_EXTERNAL float __complex__ ccoshf(float __complex__ z); -extern __DPCPP_SYCL_EXTERNAL double __complex__ ccosh(double __complex__ z); -extern __DPCPP_SYCL_EXTERNAL float __complex__ ctanhf(float __complex__ z); -extern __DPCPP_SYCL_EXTERNAL double __complex__ ctanh(double __complex__ z); -extern __DPCPP_SYCL_EXTERNAL float __complex__ csinf(float __complex__ z); -extern __DPCPP_SYCL_EXTERNAL double __complex__ csin(double __complex__ z); -extern __DPCPP_SYCL_EXTERNAL float __complex__ ccosf(float __complex__ z); -extern __DPCPP_SYCL_EXTERNAL double __complex__ ccos(double __complex__ z); -extern __DPCPP_SYCL_EXTERNAL float __complex__ ctanf(float __complex__ z); -extern __DPCPP_SYCL_EXTERNAL double __complex__ ctan(double __complex__ z); -extern __DPCPP_SYCL_EXTERNAL float __complex__ cacosf(float __complex__ z); -extern __DPCPP_SYCL_EXTERNAL double __complex__ cacos(double __complex__ z); -extern __DPCPP_SYCL_EXTERNAL float __complex__ cacoshf(float __complex__ z); -extern __DPCPP_SYCL_EXTERNAL double __complex__ cacosh(double __complex__ z); -extern __DPCPP_SYCL_EXTERNAL float __complex__ casinf(float __complex__ z); -extern __DPCPP_SYCL_EXTERNAL double __complex__ casin(double __complex__ z); -extern __DPCPP_SYCL_EXTERNAL float __complex__ casinhf(float __complex__ z); -extern __DPCPP_SYCL_EXTERNAL double __complex__ casinh(double __complex__ z); -extern __DPCPP_SYCL_EXTERNAL float __complex__ catanf(float __complex__ z); -extern __DPCPP_SYCL_EXTERNAL double __complex__ catan(double __complex__ z); -extern __DPCPP_SYCL_EXTERNAL float __complex__ catanhf(float __complex__ z); -extern __DPCPP_SYCL_EXTERNAL double __complex__ catanh(double __complex__ z); -extern __DPCPP_SYCL_EXTERNAL float __complex__ cpolarf(float rho, float theta); -extern __DPCPP_SYCL_EXTERNAL double __complex__ cpolar(double rho, - double theta); -extern __DPCPP_SYCL_EXTERNAL float __complex__ __mulsc3(float a, float b, - float c, float d); -extern __DPCPP_SYCL_EXTERNAL double __complex__ __muldc3(double a, double b, - double c, double d); -extern __DPCPP_SYCL_EXTERNAL float __complex__ __divsc3(float a, float b, - float c, float d); -extern __DPCPP_SYCL_EXTERNAL double __complex__ __divdc3(float a, float b, - float c, float d); +extern __DPCPP_SYCL_EXTERNAL_LIBC float cimagf(float __complex__ z); +extern __DPCPP_SYCL_EXTERNAL_LIBC double cimag(double __complex__ z); +extern __DPCPP_SYCL_EXTERNAL_LIBC float crealf(float __complex__ z); +extern __DPCPP_SYCL_EXTERNAL_LIBC double creal(double __complex__ z); +extern __DPCPP_SYCL_EXTERNAL_LIBC float cargf(float __complex__ z); +extern __DPCPP_SYCL_EXTERNAL_LIBC double carg(double __complex__ z); +extern __DPCPP_SYCL_EXTERNAL_LIBC float cabsf(float __complex__ z); +extern __DPCPP_SYCL_EXTERNAL_LIBC double cabs(double __complex__ z); +extern __DPCPP_SYCL_EXTERNAL_LIBC float __complex__ cprojf(float __complex__ z); +extern __DPCPP_SYCL_EXTERNAL_LIBC double __complex__ cproj( + double __complex__ z); +extern __DPCPP_SYCL_EXTERNAL_LIBC float __complex__ cexpf(float __complex__ z); +extern __DPCPP_SYCL_EXTERNAL_LIBC double __complex__ cexp(double __complex__ z); +extern __DPCPP_SYCL_EXTERNAL_LIBC float __complex__ clogf(float __complex__ z); +extern __DPCPP_SYCL_EXTERNAL_LIBC double __complex__ clog(double __complex__ z); +extern __DPCPP_SYCL_EXTERNAL_LIBC float __complex__ cpowf(float __complex__ z); +extern __DPCPP_SYCL_EXTERNAL_LIBC double __complex__ cpow(double __complex__ z); +extern __DPCPP_SYCL_EXTERNAL_LIBC float __complex__ csqrtf(float __complex__ z); +extern __DPCPP_SYCL_EXTERNAL_LIBC double __complex__ csqrt( + double __complex__ z); +extern __DPCPP_SYCL_EXTERNAL_LIBC float __complex__ csinhf(float __complex__ z); +extern __DPCPP_SYCL_EXTERNAL_LIBC double __complex__ csinh( + double __complex__ z); +extern __DPCPP_SYCL_EXTERNAL_LIBC float __complex__ ccoshf(float __complex__ z); +extern __DPCPP_SYCL_EXTERNAL_LIBC double __complex__ ccosh( + double __complex__ z); +extern __DPCPP_SYCL_EXTERNAL_LIBC float __complex__ ctanhf(float __complex__ z); +extern __DPCPP_SYCL_EXTERNAL_LIBC double __complex__ ctanh( + double __complex__ z); +extern __DPCPP_SYCL_EXTERNAL_LIBC float __complex__ csinf(float __complex__ z); +extern __DPCPP_SYCL_EXTERNAL_LIBC double __complex__ csin(double __complex__ z); +extern __DPCPP_SYCL_EXTERNAL_LIBC float __complex__ ccosf(float __complex__ z); +extern __DPCPP_SYCL_EXTERNAL_LIBC double __complex__ ccos(double __complex__ z); +extern __DPCPP_SYCL_EXTERNAL_LIBC float __complex__ ctanf(float __complex__ z); +extern __DPCPP_SYCL_EXTERNAL_LIBC double __complex__ ctan(double __complex__ z); +extern __DPCPP_SYCL_EXTERNAL_LIBC float __complex__ cacosf(float __complex__ z); +extern __DPCPP_SYCL_EXTERNAL_LIBC double __complex__ cacos( + double __complex__ z); +extern __DPCPP_SYCL_EXTERNAL_LIBC float __complex__ cacoshf( + float __complex__ z); +extern __DPCPP_SYCL_EXTERNAL_LIBC double __complex__ cacosh( + double __complex__ z); +extern __DPCPP_SYCL_EXTERNAL_LIBC float __complex__ casinf(float __complex__ z); +extern __DPCPP_SYCL_EXTERNAL_LIBC double __complex__ casin( + double __complex__ z); +extern __DPCPP_SYCL_EXTERNAL_LIBC float __complex__ casinhf( + float __complex__ z); +extern __DPCPP_SYCL_EXTERNAL_LIBC double __complex__ casinh( + double __complex__ z); +extern __DPCPP_SYCL_EXTERNAL_LIBC float __complex__ catanf(float __complex__ z); +extern __DPCPP_SYCL_EXTERNAL_LIBC double __complex__ catan( + double __complex__ z); +extern __DPCPP_SYCL_EXTERNAL_LIBC float __complex__ catanhf( + float __complex__ z); +extern __DPCPP_SYCL_EXTERNAL_LIBC double __complex__ catanh( + double __complex__ z); +extern __DPCPP_SYCL_EXTERNAL_LIBC float __complex__ cpolarf(float rho, + float theta); +extern __DPCPP_SYCL_EXTERNAL_LIBC double __complex__ cpolar(double rho, + double theta); +extern __DPCPP_SYCL_EXTERNAL_LIBC float __complex__ __mulsc3(float a, float b, + float c, float d); +extern __DPCPP_SYCL_EXTERNAL_LIBC double __complex__ __muldc3(double a, + double b, + double c, + double d); +extern __DPCPP_SYCL_EXTERNAL_LIBC float __complex__ __divsc3(float a, float b, + float c, float d); +extern __DPCPP_SYCL_EXTERNAL_LIBC double __complex__ __divdc3(float a, float b, + float c, float d); } #elif defined(_WIN32) extern "C" { @@ -177,21 +194,21 @@ extern "C" { // APIs used by STL, such as _Cosh, are undocumented, even though // they are open-sourced. Recognizing them as builtins is not // straightforward currently. -extern __DPCPP_SYCL_EXTERNAL double _Cosh(double x, double y); -extern __DPCPP_SYCL_EXTERNAL int _dpcomp(double x, double y); -extern __DPCPP_SYCL_EXTERNAL int _dsign(double x); -extern __DPCPP_SYCL_EXTERNAL short _Dtest(double *px); -extern __DPCPP_SYCL_EXTERNAL short _dtest(double *px); -extern __DPCPP_SYCL_EXTERNAL short _Exp(double *px, double y, short eoff); -extern __DPCPP_SYCL_EXTERNAL float _FCosh(float x, float y); -extern __DPCPP_SYCL_EXTERNAL int _fdpcomp(float x, float y); -extern __DPCPP_SYCL_EXTERNAL int _fdsign(float x); -extern __DPCPP_SYCL_EXTERNAL short _FDtest(float *px); -extern __DPCPP_SYCL_EXTERNAL short _fdtest(float *px); -extern __DPCPP_SYCL_EXTERNAL short _FExp(float *px, float y, short eoff); -extern __DPCPP_SYCL_EXTERNAL float _FSinh(float x, float y); -extern __DPCPP_SYCL_EXTERNAL double _Sinh(double x, double y); -extern __DPCPP_SYCL_EXTERNAL float _hypotf(float x, float y); +extern __DPCPP_SYCL_EXTERNAL_LIBC double _Cosh(double x, double y); +extern __DPCPP_SYCL_EXTERNAL_LIBC int _dpcomp(double x, double y); +extern __DPCPP_SYCL_EXTERNAL_LIBC int _dsign(double x); +extern __DPCPP_SYCL_EXTERNAL_LIBC short _Dtest(double *px); +extern __DPCPP_SYCL_EXTERNAL_LIBC short _dtest(double *px); +extern __DPCPP_SYCL_EXTERNAL_LIBC short _Exp(double *px, double y, short eoff); +extern __DPCPP_SYCL_EXTERNAL_LIBC float _FCosh(float x, float y); +extern __DPCPP_SYCL_EXTERNAL_LIBC int _fdpcomp(float x, float y); +extern __DPCPP_SYCL_EXTERNAL_LIBC int _fdsign(float x); +extern __DPCPP_SYCL_EXTERNAL_LIBC short _FDtest(float *px); +extern __DPCPP_SYCL_EXTERNAL_LIBC short _fdtest(float *px); +extern __DPCPP_SYCL_EXTERNAL_LIBC short _FExp(float *px, float y, short eoff); +extern __DPCPP_SYCL_EXTERNAL_LIBC float _FSinh(float x, float y); +extern __DPCPP_SYCL_EXTERNAL_LIBC double _Sinh(double x, double y); +extern __DPCPP_SYCL_EXTERNAL_LIBC float _hypotf(float x, float y); } #endif #endif // __SYCL_DEVICE_ONLY__ -- 2.47.2