OpenCL Particles System Example in Processing

The second example is adopted from the Million Particles example from the MSAOpenCL library. I developed it with Processing 2.0 alpha version without using the library and the Pointer class. All the memory buffers are standard Java.nio.Buffer. The performance is very acceptable with one million particles in the ATI Radeon HD 4670 graphics card.
 

 
The Processing source

import processing.opengl.*;
import javax.media.opengl.*;
import javax.media.opengl.glu.GLU;
import java.nio.FloatBuffer;
import java.nio.ByteBuffer;
 
import com.nativelibs4java.opencl.*;
import com.nativelibs4java.opencl.CLMem.Usage;
 
final int PARTICLES_COUNT = 1000000;
float halfWidth, halfHeight;
 
GL2 gl;
PGL pgl;
 
int [] vbo = new int[1];
 
CLContext context;
CLQueue queue;
CLKernel kernel;
 
CLBuffer<Float> partMem;
FloatBuffer partBuf;
 
CLBuffer<Byte> posMem;
ByteBuffer partPos;
 
PVector mousePos;
 
void setup() {
  size(screenWidth, screenHeight, OPENGL);
  background(0);
  randomSeed(millis());
  halfWidth = width/2;
  halfHeight = height/2;
  mousePos = new PVector(float(mouseX) - halfWidth, halfHeight - float(mouseY));
 
  PGraphicsOpenGL pg = (PGraphicsOpenGL) g;
  pgl = pg.beginPGL();
  gl = pgl.gl.getGL().getGL2();
  gl.glClearColor(0, 0, 0, 1);
  gl.glClear(GL.GL_COLOR_BUFFER_BIT);
  gl.glEnable(GL2.GL_POINT_SMOOTH);
  gl.glPointSize(2f);
 
  initOpenCL();
  pg.endPGL();
}
 
void initOpenCL() {
  context = JavaCL.createContextFromCurrentGL();
  queue = context.createDefaultQueue();
 
  partBuf = FloatBuffer.allocate(PARTICLES_COUNT * 4);
  partPos = ByteBuffer.allocateDirect(PARTICLES_COUNT * 2 * Float.SIZE/8).order(context.getByteOrder());
  FloatBuffer tmpPos = partPos.asFloatBuffer();
 
  for (int i = 0; i < PARTICLES_COUNT; i++) {
 
    partBuf.put(0.0f);
    partBuf.put(0.0f);
    partBuf.put(random(0.2, 2.0));
    partBuf.put(0.0f);
 
    tmpPos.put(random(width));
    tmpPos.put(random(height));
  }
 
  partBuf.rewind();
  partPos.rewind();
 
  gl.glGenBuffers(1, vbo, 0);
  gl.glBindBuffer(GL.GL_ARRAY_BUFFER, vbo[0]);
 
  gl.glBufferData(GL.GL_ARRAY_BUFFER, (int) partPos.capacity(), partPos, GL2.GL_DYNAMIC_COPY);
  gl.glBindBuffer(GL.GL_ARRAY_BUFFER, 0);
 
  posMem = context.createBufferFromGLBuffer(Usage.InputOutput, vbo[0]);
  partMem = context.createFloatBuffer(Usage.InputOutput, partBuf, true);
 
  String pgmSrc = join(loadStrings(dataPath("Particle.cl")), "\n");
  CLProgram program = context.createProgram(pgmSrc);
  kernel = program.build().createKernel("updateParticle");
  kernel.setArg(0, partMem);
  kernel.setArg(1, posMem);
  kernel.setArg(2, new float [] {
    mousePos.x, mousePos.y
  }
  );
}
 
void draw() {
  background(0);
  gl.glMatrixMode(GL2.GL_PROJECTION);
  gl.glLoadIdentity();
  pgl.glu.gluOrtho2D(-halfWidth - 1, halfWidth + 1, -halfHeight - 1, halfHeight + 1);
  gl.glMatrixMode(GL2.GL_MODELVIEW);
  gl.glColor3f(1.0f, 0.8f, 0.0f);
  gl.glBindBuffer(GL2.GL_ARRAY_BUFFER, vbo[0]);
  queue.finish();
  gl.glEnableClientState(GL2.GL_VERTEX_ARRAY);
  gl.glVertexPointer(2, GL.GL_FLOAT, 0, 0);
  gl.glDrawArrays(GL2.GL_POINTS, 0, PARTICLES_COUNT);
 
  gl.glBindBuffer(GL2.GL_ARRAY_BUFFER, 0);
  gl.glDisableClientState(GL2.GL_VERTEX_ARRAY);
  callKernel();
}
 
void callKernel() {
  mousePos.set(float(mouseX) - halfWidth, halfHeight - float(mouseY), 0.0f);
  CLEvent kernelCompletion;
  synchronized(kernel) {
    posMem.acquireGLObject(queue);
    kernel.setArg(2, new float [] {
      mousePos.x, mousePos.y
    }
    );
    int [] globalSizes = new int[] {
      PARTICLES_COUNT
    };
    kernelCompletion = kernel.enqueueNDRange(queue, globalSizes);
    posMem.releaseGLObject(queue);
  }
}

 
The kernel source

#define DAMP			0.95f
#define CENTER_FORCE		0.005f
#define MOUSE_FORCE		200.0f
#define MIN_SPEED		0.2f
 
typedef struct{
	float2 vel;
	float mass;
	float dummy;		
} Particle;
 
__kernel void updateParticle(__global Particle* particles, 
	__global float2* posBuffer, 
	const float2 mousePos)
{
	int id = get_global_id(0);
	__global Particle *p = &particles[id];
 
	float2 diff = mousePos - posBuffer[id];
	float invDistSQ = 1.0f / dot(diff, diff);
	diff *= (MOUSE_FORCE * invDistSQ);
 
	p->vel += -posBuffer[id] * CENTER_FORCE - diff * p->mass;
 
	float speed2 = dot(p->vel, p->vel);
	if (speed2 < MIN_SPEED) 
		posBuffer[id] = mousePos + diff * (1 + p->mass);
 
	posBuffer[id] += p->vel;
	p->vel *= DAMP;
}

3 Replies to “OpenCL Particles System Example in Processing”

  1. hi there,
    com.nativelibs4java.opencl.CLMem isn’t found by the code
    any idea?

    the lib seems to have changed a lot.

  2. Hi

    That is impressive.

    I tried to run you source code but I get this kind of runtime error:

    Failed to create an OpenCL context based on the current OpenGL context
    or
    Failed to create an OpenGL-sharing-enabled OpenCL context out of devices [GeForce GT 330M, Intel(R) Core(TM) i5 CPU M 520 @ 2.40GHz]

    I’m on MBP/SnowLeopard with GeForce GT 330M, P55 2.05a running in 64bit

    Do you have any hint about these error?
    Thanks a lot

    Fabio

Comments are closed.