processing.org

Kinect in Windows for Processing 5

This is a more or less finished version of the Kinect for Processing library. It includes the basic skeleton tracking, the RGB image, depth image and a mask image. I shall move the related posts to the research page with more documentation on the data structure and method description. Stay tune.
 

 
The sample Processing code:

import pKinect.PKinect;
import pKinect.SkeletonData;
 
PKinect kinect;
PFont font;
ArrayList<SkeletonData> bodies;
PImage img;
 
void setup()
{
  size(640, 480);
  background(0);
  kinect = new PKinect(this);
  bodies = new ArrayList<SkeletonData>();
  smooth();
  font = loadFont("LucidaSans-18.vlw");
  textFont(font, 18);
  textAlign(CENTER);
  img = loadImage("background.png");
}
 
void draw()
{
  background(0);
  image(kinect.GetImage(), 320, 0, 320, 240);
  image(kinect.GetDepth(), 320, 240, 320, 240);
  image(img, 0, 240, 320, 240);
  image(kinect.GetMask(), 0, 240, 320, 240);
  for (int i=0; i<bodies.size(); i++) 
  {
    drawSkeleton(bodies.get(i));
    drawPosition(bodies.get(i));
  }
}
 
void mousePressed() 
{
  println(frameRate);
}
 
void drawPosition(SkeletonData _s) 
{
  noStroke();
  fill(0, 100, 255);
  String s1 = str(_s.dwTrackingID);
  text(s1, _s.position.x*width/2, _s.position.y*height/2);
}
 
void drawSkeleton(SkeletonData _s) 
{
  // Body
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_HEAD, 
  PKinect.NUI_SKELETON_POSITION_SHOULDER_CENTER);
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_SHOULDER_CENTER, 
  PKinect.NUI_SKELETON_POSITION_SHOULDER_LEFT);
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_SHOULDER_CENTER, 
  PKinect.NUI_SKELETON_POSITION_SHOULDER_RIGHT);
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_SHOULDER_CENTER, 
  PKinect.NUI_SKELETON_POSITION_SPINE);
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_SHOULDER_LEFT, 
  PKinect.NUI_SKELETON_POSITION_SPINE);
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_SHOULDER_RIGHT, 
  PKinect.NUI_SKELETON_POSITION_SPINE);
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_SPINE, 
  PKinect.NUI_SKELETON_POSITION_HIP_CENTER);
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_HIP_CENTER, 
  PKinect.NUI_SKELETON_POSITION_HIP_LEFT);
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_HIP_CENTER, 
  PKinect.NUI_SKELETON_POSITION_HIP_RIGHT);
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_HIP_LEFT, 
  PKinect.NUI_SKELETON_POSITION_HIP_RIGHT);
 
  // Left Arm
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_SHOULDER_LEFT, 
  PKinect.NUI_SKELETON_POSITION_ELBOW_LEFT);
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_ELBOW_LEFT, 
  PKinect.NUI_SKELETON_POSITION_WRIST_LEFT);
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_WRIST_LEFT, 
  PKinect.NUI_SKELETON_POSITION_HAND_LEFT);
 
  // Right Arm
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_SHOULDER_RIGHT, 
  PKinect.NUI_SKELETON_POSITION_ELBOW_RIGHT);
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_ELBOW_RIGHT, 
  PKinect.NUI_SKELETON_POSITION_WRIST_RIGHT);
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_WRIST_RIGHT, 
  PKinect.NUI_SKELETON_POSITION_HAND_RIGHT);
 
  // Left Leg
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_HIP_LEFT, 
  PKinect.NUI_SKELETON_POSITION_KNEE_LEFT);
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_KNEE_LEFT, 
  PKinect.NUI_SKELETON_POSITION_ANKLE_LEFT);
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_ANKLE_LEFT, 
  PKinect.NUI_SKELETON_POSITION_FOOT_LEFT);
 
  // Right Leg
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_HIP_RIGHT, 
  PKinect.NUI_SKELETON_POSITION_KNEE_RIGHT);
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_KNEE_RIGHT, 
  PKinect.NUI_SKELETON_POSITION_ANKLE_RIGHT);
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_ANKLE_RIGHT, 
  PKinect.NUI_SKELETON_POSITION_FOOT_RIGHT);
}
 
void DrawBone(SkeletonData _s, int _j1, int _j2) 
{
  noFill();
  stroke(255, 255, 0);
  if (_s.skeletonPositionTrackingState[_j1] != PKinect.NUI_SKELETON_POSITION_NOT_TRACKED &&
    _s.skeletonPositionTrackingState[_j2] != PKinect.NUI_SKELETON_POSITION_NOT_TRACKED) {
    line(_s.skeletonPositions[_j1].x*width/2, 
    _s.skeletonPositions[_j1].y*height/2, 
    _s.skeletonPositions[_j2].x*width/2, 
    _s.skeletonPositions[_j2].y*height/2);
  }
}
 
void appearEvent(SkeletonData _s) 
{
  if (_s.trackingState == PKinect.NUI_SKELETON_NOT_TRACKED) 
  {
    return;
  }
  synchronized(bodies) {
    bodies.add(_s);
  }
}
 
void disappearEvent(SkeletonData _s) 
{
  synchronized(bodies) {
    for (int i=bodies.size()-1; i>=0; i--) 
    {
      if (_s.dwTrackingID == bodies.get(i).dwTrackingID) 
      {
        bodies.remove(i);
      }
    }
  }
}
 
void moveEvent(SkeletonData _b, SkeletonData _a) 
{
  if (_a.trackingState == PKinect.NUI_SKELETON_NOT_TRACKED) 
  {
    return;
  }
  synchronized(bodies) {
    for (int i=bodies.size()-1; i>=0; i--) 
    {
      if (_b.dwTrackingID == bodies.get(i).dwTrackingID) 
      {
        bodies.get(i).copy(_a);
        break;
      }
    }
  }
}

You can download the example and the library here.

Kinect for Windows in Processing 4

This is the new version of the Kinect for Processing library using the Kinect for Windows 1.5 SDK. In this version, I only implement the skeleton tracking, without the previous RGB image and depth image yet. For all the field, method and constant names, I try to use the same ones as in the SDK. It can have multiple skeletons and comes with three events:

  • appear (new skeleton enters the screen)
  • disappear (existing skeleton leaves the screen)
  • move (existing skeleton stay in the screen)

 

Here is the example Processing code.

import pKinect.PKinect;
 
PKinect kinect;
PFont font;
ArrayList<SkeletonData> bodies;
 
void setup()
{
  size(640, 480);
  background(0);
  kinect = new PKinect(this);
  bodies = new ArrayList<SkeletonData>();
  smooth();
  font = loadFont("LucidaSans-18.vlw");
  textFont(font, 18);
  textAlign(CENTER);
}
 
void draw()
{
  fill(0, 0, 0, 16);
  rect(0, 0, width, height);
  for (int i=0; i<bodies.size(); i++) 
  {
    drawSkeleton(bodies.get(i));
    drawPosition(bodies.get(i));
  }
}
 
void drawPosition(SkeletonData _s) 
{
  noStroke();
  fill(0, 255, 255);
  String s1 = str(_s.dwTrackingID);
  text(s1, _s.position.x*width, _s.position.y*height);
}
 
void drawSkeleton(SkeletonData _s) 
{
  // Body
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_HEAD, 
  PKinect.NUI_SKELETON_POSITION_SHOULDER_CENTER);
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_SHOULDER_CENTER, 
  PKinect.NUI_SKELETON_POSITION_SHOULDER_LEFT);
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_SHOULDER_CENTER, 
  PKinect.NUI_SKELETON_POSITION_SHOULDER_RIGHT);
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_SHOULDER_CENTER, 
  PKinect.NUI_SKELETON_POSITION_SPINE);
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_SHOULDER_LEFT, 
  PKinect.NUI_SKELETON_POSITION_SPINE);
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_SHOULDER_RIGHT, 
  PKinect.NUI_SKELETON_POSITION_SPINE);
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_SPINE, 
  PKinect.NUI_SKELETON_POSITION_HIP_CENTER);
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_HIP_CENTER, 
  PKinect.NUI_SKELETON_POSITION_HIP_LEFT);
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_HIP_CENTER, 
  PKinect.NUI_SKELETON_POSITION_HIP_RIGHT);
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_HIP_LEFT, 
  PKinect.NUI_SKELETON_POSITION_HIP_RIGHT);
 
  // Left Arm
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_SHOULDER_LEFT, 
  PKinect.NUI_SKELETON_POSITION_ELBOW_LEFT);
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_ELBOW_LEFT, 
  PKinect.NUI_SKELETON_POSITION_WRIST_LEFT);
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_WRIST_LEFT, 
  PKinect.NUI_SKELETON_POSITION_HAND_LEFT);
 
  // Right Arm
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_SHOULDER_RIGHT, 
  PKinect.NUI_SKELETON_POSITION_ELBOW_RIGHT);
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_ELBOW_RIGHT, 
  PKinect.NUI_SKELETON_POSITION_WRIST_RIGHT);
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_WRIST_RIGHT, 
  PKinect.NUI_SKELETON_POSITION_HAND_RIGHT);
 
  // Left Leg
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_HIP_LEFT, 
  PKinect.NUI_SKELETON_POSITION_KNEE_LEFT);
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_KNEE_LEFT, 
  PKinect.NUI_SKELETON_POSITION_ANKLE_LEFT);
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_ANKLE_LEFT, 
  PKinect.NUI_SKELETON_POSITION_FOOT_LEFT);
 
  // Right Leg
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_HIP_RIGHT, 
  PKinect.NUI_SKELETON_POSITION_KNEE_RIGHT);
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_KNEE_RIGHT, 
  PKinect.NUI_SKELETON_POSITION_ANKLE_RIGHT);
  DrawBone(_s, 
  PKinect.NUI_SKELETON_POSITION_ANKLE_RIGHT, 
  PKinect.NUI_SKELETON_POSITION_FOOT_RIGHT);
}
 
void DrawBone(SkeletonData _s, int _j1, int _j2) 
{
  noFill();
  stroke(255, 200, 0);
  if (_s.skeletonPositionTrackingState[_j1] != PKinect.NUI_SKELETON_POSITION_NOT_TRACKED &&
    _s.skeletonPositionTrackingState[_j2] != PKinect.NUI_SKELETON_POSITION_NOT_TRACKED) {
    line(_s.skeletonPositions[_j1].x*width, 
    _s.skeletonPositions[_j1].y*height, 
    _s.skeletonPositions[_j2].x*width, 
    _s.skeletonPositions[_j2].y*height);
  }
}
 
void appearEvent(SkeletonData _s) 
{
  if (_s.trackingState == PKinect.NUI_SKELETON_NOT_TRACKED) 
  {
    return;
  }
  synchronized(bodies) {
    bodies.add(_s);
  }
  println("appearing ..." + _s.dwTrackingID);
}
 
void disappearEvent(SkeletonData _s) 
{
  synchronized(bodies) {
    for (int i=bodies.size()-1; i>=0; i--) 
    {
      if (_s.dwTrackingID == bodies.get(i).dwTrackingID) 
      {
        bodies.remove(i);
      }
    }
  }
  println("Disappearing ... " + _s.dwTrackingID);
}
 
void moveEvent(SkeletonData _b, SkeletonData _a) 
{
  if (_a.trackingState == PKinect.NUI_SKELETON_NOT_TRACKED) 
  {
    return;
  }
  synchronized(bodies) {
    for (int i=bodies.size()-1; i>=0; i--) 
    {
      if (_b.dwTrackingID == bodies.get(i).dwTrackingID) 
      {
        bodies.get(i).copy(_a);
        break;
      }
    }
  }
}

 
The Processing code and library can be downloaded here.

Kinect for Windows in Processing 3

Finally, the skeleton part of the library is done. In this very experimental version, I extract only one skeleton and store the joints information in an array (size 20) of PVector type in Processing. The tracking state is not implemented yet. I use the z-depth value to indicate if that joint is validly tracked or not. The x and y values are normalized to the range from 0 to 1 in the screen space. In the next version, I would like to implement the async event of tracking. It is time to integrate the all three components:

  1. Individual RGB and depth images
  2. Aligned RGB and depth image
  3. Skeleton tracking

 

 
Here is a copy of the sample Processing code.

import pKinect.PKinect;
 
PKinect kinect;
PVector [] loc;
 
void setup()
{
  size(640, 480);
  kinect = new PKinect(this);
  smooth();
  noFill();
  stroke(255, 255, 0);
}
 
void draw()
{
  background(0);
  loc = kinect.getSkeleton();
  drawSkeleton();
}
 
void drawSkeleton()
{
  // Body
  DrawBone(kinect.NUI_SKELETON_POSITION_HEAD,
  kinect.NUI_SKELETON_POSITION_SHOULDER_CENTER);
  DrawBone(kinect.NUI_SKELETON_POSITION_SHOULDER_CENTER,
  kinect.NUI_SKELETON_POSITION_SHOULDER_LEFT);
  DrawBone(kinect.NUI_SKELETON_POSITION_SHOULDER_CENTER,
  kinect.NUI_SKELETON_POSITION_SHOULDER_RIGHT);
  DrawBone(kinect.NUI_SKELETON_POSITION_SHOULDER_CENTER,
  kinect.NUI_SKELETON_POSITION_SPINE);
  DrawBone(kinect.NUI_SKELETON_POSITION_SPINE,
  kinect.NUI_SKELETON_POSITION_HIP_CENTER);
  DrawBone(kinect.NUI_SKELETON_POSITION_HIP_CENTER,
  kinect.NUI_SKELETON_POSITION_HIP_LEFT);
  DrawBone(kinect.NUI_SKELETON_POSITION_HIP_CENTER,
  kinect.NUI_SKELETON_POSITION_HIP_RIGHT);
 
  // Left Arm
  DrawBone(kinect.NUI_SKELETON_POSITION_SHOULDER_LEFT,
  kinect.NUI_SKELETON_POSITION_ELBOW_LEFT);
  DrawBone(kinect.NUI_SKELETON_POSITION_ELBOW_LEFT,
  kinect.NUI_SKELETON_POSITION_WRIST_LEFT);
  DrawBone(kinect.NUI_SKELETON_POSITION_WRIST_LEFT,
  kinect.NUI_SKELETON_POSITION_HAND_LEFT);
 
  // Right Arm
  DrawBone(kinect.NUI_SKELETON_POSITION_SHOULDER_RIGHT,
  kinect.NUI_SKELETON_POSITION_ELBOW_RIGHT);
  DrawBone(kinect.NUI_SKELETON_POSITION_ELBOW_RIGHT,
  kinect.NUI_SKELETON_POSITION_WRIST_RIGHT);
  DrawBone(kinect.NUI_SKELETON_POSITION_WRIST_RIGHT,
  kinect.NUI_SKELETON_POSITION_HAND_RIGHT);
 
  // Left Leg
  DrawBone(kinect.NUI_SKELETON_POSITION_HIP_LEFT,
  kinect.NUI_SKELETON_POSITION_KNEE_LEFT);
  DrawBone(kinect.NUI_SKELETON_POSITION_KNEE_LEFT,
  kinect.NUI_SKELETON_POSITION_ANKLE_LEFT);
  DrawBone(kinect.NUI_SKELETON_POSITION_ANKLE_LEFT,
  kinect.NUI_SKELETON_POSITION_FOOT_LEFT);
 
  // Right Leg
  DrawBone(kinect.NUI_SKELETON_POSITION_HIP_RIGHT,
  kinect.NUI_SKELETON_POSITION_KNEE_RIGHT);
  DrawBone(kinect.NUI_SKELETON_POSITION_KNEE_RIGHT,
  kinect.NUI_SKELETON_POSITION_ANKLE_RIGHT);
  DrawBone(kinect.NUI_SKELETON_POSITION_ANKLE_RIGHT,
  kinect.NUI_SKELETON_POSITION_FOOT_RIGHT);
}
 
void DrawBone(int _s, int _e)
{
  if (loc == null)
  {
    return;
  }
  PVector p1 = loc[_s];
  PVector p2 = loc[_e];
  if (p1.z == 0.0 || p2.z == 0.0)
  {
    return;
  }
  line(p1.x*width, p1.y*height, p2.x*width, p2.y*height);
}

This version of the code (for Windows 7, both 32-bit and 64-bit) is available here.

Kinect for Windows in Processing 2

After the first post of the Kinect for Windows library, this is the second version of the Kinect for Windows in Processing. I modified the pixels array to contain only the colour pixels of players. The rest is painted green as background. In this version, I align the colour and depth pixels in one output frame buffer, with the use of the NuiImageGetColorPixelCoordinateFrameFromDepthPixelFrameAtResolution method.
 

 
The Processing code is here. The DLL is in the code folder of the sketch. In this version, I used Windows 7 32-bit and Java 7. I’ll consolidate to build both the 32-bit and 64-bit once the interface is more stable.

I use the Kinect for Windows 1.5 SDK.

Kinect for Windows in Processing 1

Finally, I start to work on a Processing library to work with Kinect for Windows SDK. This very preliminary version shows only the colour image and the depth image. For the depth image, I convert the original 13 bit depth map into a PImage for easy display. Both images are of the size 640 x 480.

You can download this for a try. It is created in Windows 7 64bit and Java 7. More to work!

The code is very straightforward.

import pKinect.PKinect;
 
PKinect kinect;
 
void setup()
{
  size(1280, 480);
  background(0);
  kinect = new PKinect(this);
}
 
void draw()
{
  image(kinect.GetImage(), 0, 0);
  image(kinect.GetDepth(), 640, 0);
}
 
void mousePressed()
{
  println(frameRate);
}

OpenCL Particles System Example in Processing

The second example is adopted from the Million Particles example from the MSAOpenCL library. I developed it with Processing 2.0 alpha version without using the library and the Pointer class. All the memory buffers are standard Java.nio.Buffer. The performance is very acceptable with one million particles in the ATI Radeon HD 4670 graphics card.
 

 
The Processing source

import processing.opengl.*;
import javax.media.opengl.*;
import javax.media.opengl.glu.GLU;
import java.nio.FloatBuffer;
import java.nio.ByteBuffer;
 
import com.nativelibs4java.opencl.*;
import com.nativelibs4java.opencl.CLMem.Usage;
 
final int PARTICLES_COUNT = 1000000;
float halfWidth, halfHeight;
 
GL2 gl;
PGL pgl;
 
int [] vbo = new int[1];
 
CLContext context;
CLQueue queue;
CLKernel kernel;
 
CLBuffer<Float> partMem;
FloatBuffer partBuf;
 
CLBuffer<Byte> posMem;
ByteBuffer partPos;
 
PVector mousePos;
 
void setup() {
  size(screenWidth, screenHeight, OPENGL);
  background(0);
  randomSeed(millis());
  halfWidth = width/2;
  halfHeight = height/2;
  mousePos = new PVector(float(mouseX) - halfWidth, halfHeight - float(mouseY));
 
  PGraphicsOpenGL pg = (PGraphicsOpenGL) g;
  pgl = pg.beginPGL();
  gl = pgl.gl.getGL().getGL2();
  gl.glClearColor(0, 0, 0, 1);
  gl.glClear(GL.GL_COLOR_BUFFER_BIT);
  gl.glEnable(GL2.GL_POINT_SMOOTH);
  gl.glPointSize(2f);
 
  initOpenCL();
  pg.endPGL();
}
 
void initOpenCL() {
  context = JavaCL.createContextFromCurrentGL();
  queue = context.createDefaultQueue();
 
  partBuf = FloatBuffer.allocate(PARTICLES_COUNT * 4);
  partPos = ByteBuffer.allocateDirect(PARTICLES_COUNT * 2 * Float.SIZE/8).order(context.getByteOrder());
  FloatBuffer tmpPos = partPos.asFloatBuffer();
 
  for (int i = 0; i < PARTICLES_COUNT; i++) {
 
    partBuf.put(0.0f);
    partBuf.put(0.0f);
    partBuf.put(random(0.2, 2.0));
    partBuf.put(0.0f);
 
    tmpPos.put(random(width));
    tmpPos.put(random(height));
  }
 
  partBuf.rewind();
  partPos.rewind();
 
  gl.glGenBuffers(1, vbo, 0);
  gl.glBindBuffer(GL.GL_ARRAY_BUFFER, vbo[0]);
 
  gl.glBufferData(GL.GL_ARRAY_BUFFER, (int) partPos.capacity(), partPos, GL2.GL_DYNAMIC_COPY);
  gl.glBindBuffer(GL.GL_ARRAY_BUFFER, 0);
 
  posMem = context.createBufferFromGLBuffer(Usage.InputOutput, vbo[0]);
  partMem = context.createFloatBuffer(Usage.InputOutput, partBuf, true);
 
  String pgmSrc = join(loadStrings(dataPath("Particle.cl")), "\n");
  CLProgram program = context.createProgram(pgmSrc);
  kernel = program.build().createKernel("updateParticle");
  kernel.setArg(0, partMem);
  kernel.setArg(1, posMem);
  kernel.setArg(2, new float [] {
    mousePos.x, mousePos.y
  }
  );
}
 
void draw() {
  background(0);
  gl.glMatrixMode(GL2.GL_PROJECTION);
  gl.glLoadIdentity();
  pgl.glu.gluOrtho2D(-halfWidth - 1, halfWidth + 1, -halfHeight - 1, halfHeight + 1);
  gl.glMatrixMode(GL2.GL_MODELVIEW);
  gl.glColor3f(1.0f, 0.8f, 0.0f);
  gl.glBindBuffer(GL2.GL_ARRAY_BUFFER, vbo[0]);
  queue.finish();
  gl.glEnableClientState(GL2.GL_VERTEX_ARRAY);
  gl.glVertexPointer(2, GL.GL_FLOAT, 0, 0);
  gl.glDrawArrays(GL2.GL_POINTS, 0, PARTICLES_COUNT);
 
  gl.glBindBuffer(GL2.GL_ARRAY_BUFFER, 0);
  gl.glDisableClientState(GL2.GL_VERTEX_ARRAY);
  callKernel();
}
 
void callKernel() {
  mousePos.set(float(mouseX) - halfWidth, halfHeight - float(mouseY), 0.0f);
  CLEvent kernelCompletion;
  synchronized(kernel) {
    posMem.acquireGLObject(queue);
    kernel.setArg(2, new float [] {
      mousePos.x, mousePos.y
    }
    );
    int [] globalSizes = new int[] {
      PARTICLES_COUNT
    };
    kernelCompletion = kernel.enqueueNDRange(queue, globalSizes);
    posMem.releaseGLObject(queue);
  }
}

 
The kernel source

#define DAMP			0.95f
#define CENTER_FORCE		0.005f
#define MOUSE_FORCE		200.0f
#define MIN_SPEED		0.2f
 
typedef struct{
	float2 vel;
	float mass;
	float dummy;		
} Particle;
 
__kernel void updateParticle(__global Particle* particles, 
	__global float2* posBuffer, 
	const float2 mousePos)
{
	int id = get_global_id(0);
	__global Particle *p = &particles[id];
 
	float2 diff = mousePos - posBuffer[id];
	float invDistSQ = 1.0f / dot(diff, diff);
	diff *= (MOUSE_FORCE * invDistSQ);
 
	p->vel += -posBuffer[id] * CENTER_FORCE - diff * p->mass;
 
	float speed2 = dot(p->vel, p->vel);
	if (speed2 < MIN_SPEED) 
		posBuffer[id] = mousePos + diff * (1 + p->mass);
 
	posBuffer[id] += p->vel;
	p->vel *= DAMP;
}

OpenCL Particles System with Processing

I ported the particles demo program in JavaCL to Processing 2.0 alpha. It has reasonable performance in my iMac up to 500,000 particles at twenty something frames per second. The video is captured using the QuickTime screen recording. The performance is much slower than the original screen version.
 

import processing.opengl.*;
import javax.media.opengl.*;
import javax.media.opengl.glu.GLU;
import java.util.Random;
 
import com.nativelibs4java.opencl.*;
import com.nativelibs4java.opencl.CLMem.Usage;
import org.bridj.Pointer;
 
import static org.bridj.Pointer.*;
 
final int particlesCount = 200000;
 
GL2 gl;
PGL pgl;
 
int [] vbo = new int[1];
 
CLContext context;
CLQueue queue;
 
Pointer<Float> velocities;
CLKernel updateParticleKernel;
 
CLBuffer<Float> massesMem, velocitiesMem;
CLBuffer<Byte> interleavedColorAndPositionsMem;
Pointer<Byte> interleavedColorAndPositionsTemp;
 
int elementSize = 4*4;
 
void setup() {
  size(800, 600, OPENGL);
  background(0);
  randomSeed(millis());
 
  PGraphicsOpenGL pg = (PGraphicsOpenGL) g;
  pgl = pg.beginPGL();
  gl = pgl.gl.getGL().getGL2();
  gl.glClearColor(0, 0, 0, 1);
  gl.glClear(GL.GL_COLOR_BUFFER_BIT);
  gl.glEnable(GL.GL_BLEND);
  gl.glEnable(GL2.GL_POINT_SMOOTH);
  gl.glPointSize(1f);
  initOpenCL();
  pg.endPGL();
}
 
void initOpenCL() {
  context = JavaCL.createContextFromCurrentGL();
  queue = context.createDefaultQueue();
 
  Pointer<Float> masses = allocateFloats(particlesCount).order(context.getByteOrder());
  velocities = allocateFloats(2 * particlesCount).order(context.getByteOrder());
  interleavedColorAndPositionsTemp = allocateBytes(elementSize * particlesCount).order(context.getByteOrder());
 
  Pointer<Float> positionsView = interleavedColorAndPositionsTemp.as(Float.class);
  for (int i = 0; i < particlesCount; i++) {
    masses.set(i, 0.5f + 0.5f * random(1));
    velocities.set(i * 2, random(-0.5, 0.5) * 0.2f);
    velocities.set(i * 2 + 1, random(-0.5, 0.5) * 0.2f);
    int colorOffset = i * elementSize;
    int posOffset = i * (elementSize / 4) + 1;
    byte r = (byte) 220, g = r, b = r, a = r;
    interleavedColorAndPositionsTemp.set(colorOffset++, r);
    interleavedColorAndPositionsTemp.set(colorOffset++, g);
    interleavedColorAndPositionsTemp.set(colorOffset++, b);
    interleavedColorAndPositionsTemp.set(colorOffset, a);
    float x = random(-0.5, 0.5) * width/2.0, 
    y = random(-0.5, 0.5) * height/2.0;
    positionsView.set(posOffset, (float) x);
    positionsView.set(posOffset + 1, (float) y);
  }
  velocitiesMem = context.createBuffer(Usage.InputOutput, velocities, false);
  massesMem = context.createBuffer(Usage.Input, masses, true);
 
  gl.glGenBuffers(1, vbo, 0);
  gl.glBindBuffer(GL.GL_ARRAY_BUFFER, vbo[0]);
  gl.glBufferData(GL.GL_ARRAY_BUFFER, (int) interleavedColorAndPositionsTemp.getValidBytes(), interleavedColorAndPositionsTemp.getByteBuffer(), GL2.GL_DYNAMIC_COPY);
  gl.glBindBuffer(GL.GL_ARRAY_BUFFER, 0);
 
  interleavedColorAndPositionsMem = context.createBufferFromGLBuffer(Usage.InputOutput, vbo[0]);
  String pgmSrc = join(loadStrings(dataPath("ParticlesDemoProgram.cl")), "\n");
  CLProgram program = context.createProgram(pgmSrc);
  updateParticleKernel = program.build().createKernel("updateParticle");
  callKernel();
}
 
void draw() {
  queue.finish();
  gl.glClear(GL.GL_COLOR_BUFFER_BIT);
  gl.glBlendFunc(GL.GL_SRC_ALPHA, GL.GL_SRC_COLOR);
  gl.glMatrixMode(GL2.GL_PROJECTION);
  gl.glLoadIdentity();
  pgl.glu.gluOrtho2D(-width/2 - 1, width/2 + 1, -height/2 - 1, height/2 + 1);
  gl.glMatrixMode(GL2.GL_MODELVIEW);
 
  gl.glBindBuffer(GL2.GL_ARRAY_BUFFER, vbo[0]);
  gl.glInterleavedArrays(GL2.GL_C4UB_V2F, elementSize, 0);
  gl.glDrawArrays(GL.GL_POINTS, 0, particlesCount);
 
  gl.glBindBuffer(GL2.GL_ARRAY_BUFFER, 0);
  callKernel();
}
 
void callKernel() {
  CLEvent kernelCompletion;
  synchronized(updateParticleKernel) {
    interleavedColorAndPositionsMem.acquireGLObject(queue);
    updateParticleKernel.setArgs(massesMem, 
    velocitiesMem, 
    interleavedColorAndPositionsMem.as(Float.class), 
    new float[] {
      mouseX-width/2, height/2-mouseY
    }
    , 
    new float[] {
      width, height
    }
    , 
    2.0, 
    2.0, 
    0.9, 
    0.8, 
    (byte) 0);
 
    int [] globalSizes = new int[] {
      particlesCount
    };
    kernelCompletion = updateParticleKernel.enqueueNDRange(queue, globalSizes);
    interleavedColorAndPositionsMem.releaseGLObject(queue);
  }
}

 
Here is the OpenCL source.

// Ported to JavaCL/OpenCL4Java (+ added colors) by Olivier Chafik
 
#define REPULSION_FORCE 4.0f
#define CENTER_FORCE2 0.0005f
 
#define PI 3.1416f
 
//#pragma OpenCL cl_khr_byte_addressable_store : enable
 
uchar4 HSVAtoRGBA(float4 hsva)
{
    float h = hsva.x, s = hsva.y, v = hsva.z, a = hsva.w;
    float r, g, b;
 
        int i;
        float f, p, q, t;
        if (s == 0) {
                // achromatic (grey)
                r = g = b = v;
                return (uchar4)(r * 255, g * 255, b * 255, a * 255);
        }
        h /= 60;                        // sector 0 to 5
        i = floor( h );
        f = h - i;                      // factorial part of h
        p = v * ( 1 - s );
        q = v * ( 1 - s * f );
        t = v * ( 1 - s * ( 1 - f ) );
        switch( i ) {
                case 0:
                        r = v;
                        g = t;
                        b = p;
                        break;
                case 1:
                        r = q;
                        g = v;
                        b = p;
                        break;
                case 2:
                        r = p;
                        g = v;
                        b = t;
                        break;
                case 3:
                        r = p;
                        g = q;
                        b = v;
                        break;
                case 4:
                        r = t;
                        g = p;
                        b = v;
                        break;
                default:                // case 5:
                        r = v;
                        g = p;
                        b = q;
                        break;
        }
    return (uchar4)(r * 255, g * 255, b * 255, a * 255);
}
 
__kernel void updateParticle(
        __global float* masses,
        __global float2* velocities,
        //__global Particle* particles,
        __global float4* particles,
        //__global char* pParticles,
        const float2 mousePos,
        const float2 dimensions,
        float massFactor,
        float speedFactor,
        float slowDownFactor,
        float mouseWeight,
        char limitToScreen
) {
	int id = get_global_id(0);
 
        float4 particle = particles[id];
 
        uchar4 color = as_uchar4(particle.x);
 
        float2 position = particle.yz;
    	float2 diff = mousePos - position;
 
        float invDistSQ = 1.0f / dot(diff, diff);
	float2 halfD = dimensions / 2.0f;
        diff *= (halfD).y * invDistSQ;
 
        float mass = massFactor * masses[id];
        float2 velocity = velocities[id];
        velocity -= mass * position * CENTER_FORCE2 - diff * mass * mouseWeight;
        position += speedFactor * velocities[id];
 
        if (limitToScreen) {
            float2 halfDims = dimensions / 2.0f;
            position = clamp(position, -halfDims, halfDims);
        }
 
        float dirDot = cross((float4)(diff, (float2)0), (float4)(velocity, (float2)0)).z;
        float speed = length(velocity);
 
        float f = speed / 4 / mass;
        float hue = (dirDot < 0 ? f : f + 1) / 2;
        hue = clamp(hue, 0.0f, 1.0f) * 360;
 
        float opacity = clamp(0.1f + f, 0.0f, 1.0f);
        float saturation = mass / 2;
        float brightness = 0.6f + opacity * 0.3f;
 
        uchar4 targetColor = HSVAtoRGBA((float4)(hue, saturation, brightness, opacity));
 
        float colorSpeedFactor = min(0.01f * speedFactor, 1.0f), otherColorSpeedFactor = 1 - colorSpeedFactor;
        color = (uchar4)(
            (uchar)(targetColor.x * colorSpeedFactor + color.x * otherColorSpeedFactor),
            (uchar)(targetColor.y * colorSpeedFactor + color.y * otherColorSpeedFactor),
            (uchar)(targetColor.z * colorSpeedFactor + color.z * otherColorSpeedFactor),
            (uchar)(targetColor.w * colorSpeedFactor + color.w * otherColorSpeedFactor)
        );
 
        particle.x = as_float(color);
        particle.yz = position;
 
    	particles[id] = particle;
 
        velocity *= slowDownFactor;
        velocities[id] = velocity;
}

OpenGL and Processing 2.0

The existing OpenGL codes for Processing do not work in the 2.0 alpha. Here is an example code segment I modify to use the new PGL class.

import processing.opengl.*;
import javax.media.opengl.*;
 
GL2 gl;
float t, s, c;
 
void setup() {
  size(400, 400, OPENGL);
  background(0);
  PGraphicsOpenGL pg = (PGraphicsOpenGL) g;
  PGL pgl = pg.beginPGL();
  gl = pgl.gl.getGL().getGL2();
  pg.endPGL();
  t = 0.0f;
  s = 0.0f;
  c = 0.0f;
}
 
void draw() {
  t += 0.01;
  s = sin(t);
  c = cos(t);
 
  gl.glClear(GL.GL_COLOR_BUFFER_BIT);
  gl.glBegin(GL.GL_TRIANGLES);
  gl.glColor3f(1, 0, 0);
  gl.glVertex3f(-c, -c, s);
  gl.glColor3f(0, 1, 0);
  gl.glVertex3f(0, c, 0);
  gl.glColor3f(0, 0, 1);
  gl.glVertex3f(s, -s, c);
  gl.glEnd();
}