[SYCL][PTX] ptxas fatal : Unresolved extern function '_Z17__spirv_ocl_rsqrtf'

When compiling some example code I found that there seems to be a mapping issue of one of the math functions.
This example is from the hipSYCL examples but it should work with the Intel backend either way.

```
Compiler in:
clang++ -fsycl -fsycl-targets=nvptx64-nvidia-cuda-sycldevice bruteforce_nbody.cpp -o sycl_test
Out:
ptxas fatal   : Unresolved extern function '_Z17__spirv_ocl_rsqrtf'
clang-11: error: ptxas command failed with exit code 255 (use -v to see invocation)
```

Code:
```
/*
 * This file is part of hipSYCL, a SYCL implementation based on CUDA/HIP
 *
 * Copyright (c) 2018 Aksel Alpay
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice, this
 *    list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <iostream>
#include <string>
#include <vector>
#include <fstream>
#include <cstdlib>
#include "bruteforce_nbody.hpp"
#include "model.hpp"


arithmetic_type mirror_position(const arithmetic_type mirror_pos,
                                const arithmetic_type position)
{
  arithmetic_type delta = cl::sycl::fabs(mirror_pos - position);
  return (position <= mirror_pos) ?
        mirror_pos + delta : mirror_pos - delta;
}

int get_num_iterations_per_output_step()
{
  char* val = std::getenv("NBODY_ITERATIONS_PER_OUTPUT");
  if(!val)
    return 1;
  return std::stoi(val);
}

template<class T, int Dim>
using local_accessor =
  sycl::accessor<T,Dim,
                 sycl::access::mode::read_write,
                 sycl::access::target::local>;

int main()
{
  const int iterations_per_output =
      get_num_iterations_per_output_step();

  std::vector<particle_type> particles;
  std::vector<vector_type> velocities;

  arithmetic_type particle_mass = total_mass / num_particles;

  random_particle_cloud particle_distribution0{
    vector_type{0.0f, 100.0f, 0.0f},
    vector_type{10.0f, 15.0f, 11.0f},
    particle_mass, 0.1f * particle_mass,
    vector_type{0.0f, -26.0f, 5.0f},
    vector_type{5.0f, 20.0f, 12.f}
  };


  random_particle_cloud particle_distribution1{
    vector_type{50.0f, 5.0f, 0.0f},
    vector_type{17.0f, 7.0f, 5.0f},
    particle_mass, 0.1f * particle_mass,
    vector_type{-5.f, 20.0f, 1.0f},
    vector_type{18.0f, 11.f, 8.f}
  };

  random_particle_cloud particle_distribution2{
    vector_type{-50.0f, -100.0f, 0.0f},
    vector_type{10.0f, 10.0f, 14.0f},
    particle_mass, 0.1f * particle_mass,
    vector_type{5.f, 5.0f, -1.0f},
    vector_type{10.0f, 6.f, 5.f}
  };

  particle_distribution0.sample(0.2 * num_particles,
                                particles, velocities);

  std::vector<particle_type> particles_cloud1, particles_cloud2;
  std::vector<vector_type> velocities_cloud1, velocities_cloud2;
  particle_distribution1.sample(0.4 * num_particles,
                                particles_cloud1, velocities_cloud1);

  particle_distribution2.sample(0.4 * num_particles,
                                particles_cloud2, velocities_cloud2);

  particles.insert(particles.end(),
                   particles_cloud1.begin(),
                   particles_cloud1.end());

  particles.insert(particles.end(),
                   particles_cloud2.begin(),
                   particles_cloud2.end());

  velocities.insert(velocities.end(),
                   velocities_cloud1.begin(),
                   velocities_cloud1.end());

  velocities.insert(velocities.end(),
                   velocities_cloud2.begin(),
                   velocities_cloud2.end());

  auto particles_buffer =
      sycl::buffer<particle_type, 1>{particles.data(), particles.size()};
  auto velocities_buffer =
      sycl::buffer<vector_type, 1>{velocities.data(), velocities.size()};
  auto forces_buffer =
      sycl::buffer<vector_type, 1>{sycl::range<1>{particles.size()}};

  sycl::default_selector selector;
  sycl::queue q{selector};

  auto execution_range = sycl::nd_range<1>{
      sycl::range<1>{((num_particles + local_size - 1) / local_size) * local_size},
      sycl::range<1>{local_size}
  };


  std::ofstream outputfile{"output.txt"};
  for(std::size_t t = 0; t < num_timesteps; ++t)
  {
    // Submit force calculation
    q.submit([&](sycl::handler& cgh){
      auto particles_access =
          particles_buffer.get_access<sycl::access::mode::read>(cgh);
      auto forces_access =
          forces_buffer.get_access<sycl::access::mode::discard_write>(cgh);

      auto scratch = local_accessor<particle_type, 1>{
        sycl::range<1>{local_size},
        cgh
      };

      cgh.parallel_for<class force_calculation_kernel>(execution_range,
                                                       [=](sycl::nd_item<1> tid){
        const size_t global_id = tid.get_global_id().get(0);
        const size_t local_id = tid.get_local_id().get(0);
        const size_t num_particles = particles_access.get_range()[0];
        vector_type force{0.0f};

        const particle_type my_particle =
            (global_id < num_particles) ? particles_access[global_id] : particle_type{0.0f};

        for(size_t offset = 0; offset < num_particles; offset += local_size)
        {
          if(global_id < num_particles)
            scratch[local_id] = particles_access[offset + local_id];
          else
            scratch[local_id] = particle_type{0.0f};
          tid.barrier();

          for(int i = 0; i < local_size; ++i)
          {
            const particle_type p = scratch[i];
            const vector_type p_direction = p.swizzle<0,1,2>();
            const vector_type R = p_direction - my_particle.swizzle<0,1,2>();
            // dot is not yet implemented
            const arithmetic_type r_inv =
                sycl::rsqrt(R.x()*R.x() + R.y()*R.y() + R.z()*R.z()
                                    + gravitational_softening);

            // Actually we just calculate the acceleration, not the
            // force. We only need the acceleration anyway.
            if(global_id != offset + i)
              force += static_cast<arithmetic_type>(p.w()) * r_inv * r_inv * r_inv * R;
          }

          tid.barrier();
        }

        if(global_id < num_particles)
          forces_access[global_id] = force;
      });
    });

    // Time integration
    q.submit([&](cl::sycl::handler& cgh){
      auto particles_access =
          particles_buffer.get_access<sycl::access::mode::read_write>(cgh);
      auto velocities_access =
          velocities_buffer.get_access<sycl::access::mode::read_write>(cgh);
      auto forces_access =
          forces_buffer.get_access<sycl::access::mode::read>(cgh);
      const arithmetic_type dt = ::dt;

      cgh.parallel_for<class integration_kernel>(execution_range,
                                                [=](sycl::nd_item<1> tid){
        const size_t global_id = tid.get_global_id().get(0);
        const size_t num_particles = particles_access.get_range().get(0);

        if(global_id < num_particles)
        {
          particle_type p = particles_access[global_id];
          vector_type v = velocities_access[global_id];
          const vector_type acceleration = forces_access[global_id];

          // Bring v to the current state
          v += acceleration * dt;

          // Update position
          p.x() += v.x() * dt;
          p.y() += v.y() * dt;
          p.z() += v.z() * dt;

          // Reflect particle position and invert velocities
          // if particles exit the simulation cube
          if(static_cast<arithmetic_type>(p.x()) <= -half_cube_size)
          {
            v.x() = cl::sycl::fabs(v.x());
            p.x() = mirror_position(-half_cube_size, p.x());
          }
          else if(static_cast<arithmetic_type>(p.x()) >= half_cube_size)
          {
            v.x() = -cl::sycl::fabs(v.x());
            p.x() = mirror_position(half_cube_size, p.x());
          }

          if(static_cast<arithmetic_type>(p.y()) <= -half_cube_size)
          {
            v.y() = cl::sycl::fabs(v.y());
            p.y() = mirror_position(-half_cube_size, p.y());
          }
          else if(static_cast<arithmetic_type>(p.y()) >= half_cube_size)
          {
            v.y() = -cl::sycl::fabs(v.y());
            p.y() = mirror_position(half_cube_size, p.y());
          }

          if(static_cast<arithmetic_type>(p.z()) <= -half_cube_size)
          {
            v.z() = cl::sycl::fabs(v.z());
            p.z() = mirror_position(-half_cube_size, p.z());
          }
          else if(static_cast<arithmetic_type>(p.z()) >= half_cube_size)
          {
            v.z() = -cl::sycl::fabs(v.z());
            p.z() = mirror_position(half_cube_size, p.z());
          }

          particles_access[global_id] = p;
          velocities_access[global_id] = v;
        }
      });
    });

    if(t % iterations_per_output == 0)
    {
      std::cout << "Writing output..."  << std::endl;
      auto particle_positions =
          particles_buffer.get_access<sycl::access::mode::read>();

      for(std::size_t i = 0; i < num_particles; ++i)
      {
        outputfile << particle_positions[i].x() << " "
                   << particle_positions[i].y() << " "
                   << particle_positions[i].z() << " " << i << std::endl;
      }
    }
  }
}

```

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[SYCL][PTX] ptxas fatal : Unresolved extern function '_Z17__spirv_ocl_rsqrtf' #2064

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

[SYCL][PTX] ptxas fatal : Unresolved extern function '_Z17__spirv_ocl_rsqrtf' #2064

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions