Need help with optimizing pattern (ported from shader)

Hi all,

I’ve ported a pattern based on this shader but am getting < 1 FPS on a Pico.

Does anyone have any ideas on how to optimize? Is it possible to profile performance?

Thanks in advance!

export var t1;

export function beforeRender(delta) {
  t1 = time(.1);
}

var log_step = 79. / 53.;

function uv(p) {
  return p + 2.0 * perlin(p * 0.1 + t1, 0, 0, 0);
}

function draw(p) {
  var d = log_step * 2.0;
  var u = uv(p);
  var ctr = 0.0;
  var lid = getLocalID(u, d, ctr);
  var gid = getGlobalID(lid, d);
  return getRandomColor(gid);
}

function quantize(d) {
  return pow(log_step, floor(log(d) / log(log_step)));
}

function d2level(d) {
  return floor(log(d) / log(log_step));
}

function level2d(level) {
  return pow(log_step, round(level));
}

function interpolate(d) {
  var level = d2level(d);
  var df = level2d(level);
  var dc = level2d(level - 1);
  var linear = (d - df) / (df - dc) / log_step;
  return smoothstep(0.0, 1.0, linear);
}

function spacing(d) {
  return 1.0 / d;
}

function id2Lborder(id, d) {
  return (id + (d / spacing(d))) * spacing(d);
}

function id2center(id, d) {
  return (id + 0.5 + (d / spacing(d))) * spacing(d);
}

function u2id(u, d) {
  return floor(u / spacing(d) - (d / spacing(d)));
}

function u2closestLBorderId(u, d) {
  var lid = u2id(u, d);
  if (u < id2center(lid, d)) {
    return lid;
  } else {
    return lid + 1;
  }
}

function u2closestBorder(u, d) {
  return id2Lborder(u2closestLBorderId(u, d), d);
}

function getLocalID(u, d, ctr) {
  var level = d2level(d);
  var df = level2d(level);
  var dc = level2d(level - 1);
  var lid = u2id(u, df);
  var lf_brdr = id2Lborder(lid, df);
  var rf_brdr = id2Lborder(lid + 1, df);
  var lc_brdr = u2closestBorder(lf_brdr, dc);
  var rc_brdr = u2closestBorder(rf_brdr, dc);
  var a = interpolate(d);
  var left = lf_brdr * a + lc_brdr * (1.0 - a);
  var right = rf_brdr * a + rc_brdr * (1.0 - a);
  if (u < left) lid--;
  else if (u > right) lid++;

  if (abs(left - right) < 0.5 * spacing(df)) {
    ctr = -99999.0;
  } else {
    ctr = (left + right) / 2.0;
  }
  return lid;
}

function setId(id, bit) {
  if (bit > 127) return id;
  if (bit < 64) {
    if (bit < 32) {
      return [id[0] | 1 << bit, id[1] | 1 << bit, id[2] | 1 << bit, id[3] | 1 << bit];
    } else {
      return [id[0] | 1 << (bit - 32), id[1] | 1 << (bit - 32), id[2] | 1 << (bit - 32), id[3]];
    }
  } else {
    if (bit < 96) {
      return [id[0], id[1] | 1 << (bit - 64), id[2], id[3]];
    } else {
      return [id[0] | 1 << (bit - 96), id[1], id[2], id[3]];
    }
  }
}

function getGlobalID(lid, d) {
  var gid = [0, 0, 0, 0];

  var start_level = d2level(d);
  for (var level = start_level; level > 0; level--) {
    var df = level2d(level);
    var dc = level2d(level - 1);
    var lf_brdr = id2Lborder(lid, df);
    var rf_brdr = id2Lborder(lid + 1, df);
    var lid_brdr = u2closestLBorderId(lf_brdr, dc);
    var rid_brdr = u2closestLBorderId(rf_brdr, dc);
    if (lid_brdr == rid_brdr) {
      gid = setId(gid, 96 - level);
      lid = u2id(lf_brdr, dc);
    } else {
      lid = lid_brdr;
    }
  }
  gid[0] = lid;
  return gid;
}

function paletteAt(t) {
  var palette = [0.500, 0.500, 0.500, 0.500, 0.500, 0.500, 1.000, 1.000, 1.000, 0.000, 0.333, 0.667];
  _r = palette[0] + palette[3] * cos(PI2 * ((palette[6] * t) + palette[9]));
  _g = palette[1] + palette[4] * cos(PI2 * ((palette[7] * t) + palette[10]));
  _b = palette[2] + palette[5] * cos(PI2 * ((palette[8] * t) + palette[11]));
  return rgb(_r, _g, _b);
}

function getRandomColor(id) {
  return paletteAt(frac(sin(id[0] * 12.9898) * 43758.5453123));
}

export function render(index) {
  draw(15 * index / pixelCount);
}

Hi, and welcome!

I applaud your ambition! Doing good shader ports is far from the easiest thing in the world. Here are a few things to consider:

With shadertoy especially, you’re looking at showpieces for systems with at least a couple of orders of magnitude more computing power and pixels. Most shaders actually look awful at the usual LED matrix resolutions, and some are just too compute intensive to run on an ESP32.

So my personal inclination is to avoid doing direct ports. Instead, I look at the shader code and try to figure out how to efficiently duplicate the effect on a Pixelblaze.

In this case, what the shader is doing is roughly this: it draws vertical columns in various colors, and then distorts the x and y coordinates with a sinusoidal function to create the stretching, squashing and movement effects. It could definitely be simplified a bit for LEDs. But for now, on to the existing code…

Before taking on optimization, there are some differences between the Pixelblaze’s language and “regular” Javascript that you should be aware of.

The most important thing is that Pixelblaze doesn’t have memory management. You can allocate arrays from a heap, but there’s no way to free them, and there is no garbage collection.

So it’s best to allocate them up front as as global variables, and just pass the pointers around. If you allocate arrays in functions that are called repeatedly, you’ll run out of memory pretty much instantly.

For example, at around line 112, in getGlobalID(), you’d need to move
var gid = [0, 0, 0, 0];
up to the top with the rest of the initialization code, to avoid allocating an array for each pixel in every frame.

Similarly, in the function setId() at around line 105, each time you’re returning an array with
return [x,y,z,w]
it’s implicitly allocating a new array. I’d suggest passing in a pointer to the previously allocated id array, and setting the members individually.

Other things to note:

Pixelblaze uses one numerical type - a 16.16 fixed point float. The range is something like -32765.9999 to 32765.9999. This shader uses some constants that exceed that range.

A couple that were easy to spot - in the calculation for getRandomColor(), and the -99,999 in getLocalID(). You might also need to rescale some calculations, but that can be done once everything else is working.

You’ll also need to be sure that variable scope is working as you need it to. It looks like there are some cases where it might not be doing the right thing. From the Pixelblaze docs:

Explicitly declared variables will either be global or local depending on where they are declared. Local variables declared using var inside a function are visible inside that function. Local variables can shadow global variables with the same name.
Implicitly defined variables are always globally scoped, even if first assigned inside a function. e.g.: function(){bar=123} will define bar as a global while function(){var baz=123} defines baz as a local variable since it uses the var keyword inside a function.

(there’s more, but I feel I’m getting extremely long winded so I’ll stop for now-- please feel free to ask questions if you have them! :slight_smile: )

1 Like

Hi, thank you for the thoughtful response! Good point about taking the shader code as inspiration/starting off point to efficiently duplicate the effect on a Pixelblaze.

As for the optimization tips, allocating arrays up front did the trick! I’m up to 24 FPS now :slight_smile:

2 Likes

All great points @zranger1!

Consider using arrayReplace(array, ...) / array.replace(...), which is an API I added to efficiently populate / overwrite array elements for exactly this kind of thing, much faster than assigning each element one at a time.

  gid.replace(x,y,z,w)
3 Likes

Just to be sure that this could really be done the way I was thinking, I wrote a short pattern that does a “Pixelblaze-ified” version of what that original shader is doing. It requires a 2D display and a working map. Runs at 64 fps on my 16x16 matrix setup.

Wavy Bands Pattern
// Wavy Bands
//
// Requires a correctly configured 2D map
//
// MIT License - use this code to make more cool things!
//
// 6/22/2023 ZRanger1

// number of displayed columns. Could easily have a UI control
export var nColumns = 4

timebase = 0;
export function beforeRender(delta) {
  timebase = (timebase + delta / 1000) % 3600
  
  tx = -timebase / 6    // speed of x axis movement
  ty = timebase / 2    // speed of y axis movement
}

export function render2D(index,x,y) {
  // distort y coord with perlin noise to vary width of individual columns
  // (constant multipliers are hand-tuned)
  y -= 0.3 * perlin(x * 2, y * 2, ty, 1.618) 
  
  // distort x cord to create wave patterns
  x += 0.1752 * sin(4 * (tx + y))
  
  // quantize color into the specified number of column bins
  h = floor(x * nColumns)
  
  // the original shader colors column edges black. Here, we darken
  // and antialias them, which looks better at low resolution
  v = (x * nColumns - 0.5)
  v =  1-(2*abs(v - h));

  // calculate the final column color, adjust brightness
  // gradient a bit and display the pixel
  hsv(h / nColumns, 0.9, pow(v,1.25))
}
3 Likes

That’s awesome! Thanks for putting this together. It’s a great example of translating the original shader visuals into a simpler form.

2 Likes

This topic was automatically closed 120 days after the last reply. New replies are no longer allowed.