#version 450 core

#define RESTART_IND_UINT 0xFFFFFFFF

#define EPS0 0.001
#define EPS1 0.01
#define MG   0.1

layout(local_size_x=1) in;

layout(std430,binding=0) buffer VarBuf { uvec4 tvar[]; } vbuf;
layout(std430,binding=1) buffer TSPointBuf { float pt[]; } tsbuf;
layout(std430,binding=2) buffer TXPointBuf { vec2 tt[]; } txbuf;
layout(std430,binding=3) buffer TIndBuf { int ind[]; } tind;
layout(std430,binding=4) buffer CPointBuf { vec4 cp[]; } cpbuf;
layout(std430,binding=7) buffer VOIdBuf { uint id[]; } vobjid;
layout(std430,binding=8) buffer ObjIdBuf { uint id[]; } objid;
layout(std430,binding=9) buffer VMap { uvec2 px[]; } vmap;

uniform CtlBlock {
    int   stage, step, width, height, N, H, nrows, ncols, first, txts;
    uint  nelem, p0, mi, nnz;
    bool  reverse;
    float C;
    vec3  colour;
  } ctl;

vec2 trv[13];

int SHClip ( vec2 cpp, vec2 cpn, int n, int k, int l )
{
  vec2  s, t, a, b;
  float ds, dt;
  int   i, m;
#define OUTPUT(P) { trv[l+m] = P;  m++; }

  s = trv[k+n-1];  ds = dot ( s-cpp, cpn );
  for ( i = m = 0;  i < n;  i++ ) {
    t = trv[k+i];  dt = dot ( t-cpp, cpn );
    if ( ds >= 0.0 ) {
      if ( dt >= 0.0 ) OUTPUT ( t )
      else {
        ds /= ds - dt;
        OUTPUT ( mix ( s, t, ds ) )
      }
    }
    else if ( dt >= 0.0 ){
      ds /= ds - dt;
      OUTPUT ( mix ( s, t, ds ) );
      OUTPUT ( t )
    }
    s = t;  ds = dt;
  }
  return m;
#undef OUTPUT
} /*SHClip*/

float PolygonArea ( int n )
{
  float a;
  int   i;

  a = (trv[0].x-trv[n-1].x)*(trv[0].y+trv[n-1].y);
  for ( i = 1; i < n; i++ )
    a += (trv[i].x-trv[i-1].x)*(trv[i].y+trv[i-1].y);
  return abs ( 0.5*a );
} /*PolygonArea*/

vec2 CentralPoint ( int n )
{
  vec2 c;
  int  i;

  for ( c = trv[0], i = 1;  i < n;  i++ )
    c += trv[i];
  return c / float(n);
} /*CentralPoint*/

bool ProcessTexel ( int x, int y )
{
  int   z, trID, ind[3], i, n, i0, i1, i2;
  float a;
  mat3  m;
  vec2  c, d;
  vec3  v0, v1, v2, bc;

  z = y*ctl.width + x;
  cpbuf.cp[z] = vec4(0.0);
  if ( (trID = int(vbuf.tvar[z].y)) == RESTART_IND_UINT )
    return false;
        /* znajdz indeksy trojkata dla danego piksela */
  for ( i = 0; i < 3; i++ ) {
    ind[i] = tind.ind[3*trID+i];
    trv[i] = txbuf.tt[ind[i]];
  }
        /* znajdz przeciecie trojkata z pikselem elementu */
  if ( (n = SHClip ( vec2(float(x),0.0), vec2(1.0,0.0), 3, 0, 7 )) < 3 )
    return false;
  if ( (n = SHClip ( vec2(0.0,float(y)), vec2(0.0,1.0), n, 7, 0 )) < 3 )
    return false;
  if ( (n = SHClip ( vec2(float(x+1),0.0), vec2(-1.0,0.0), n, 0, 7 )) < 3 )
    return false;
  if ( (n = SHClip ( vec2(0.0,float(y+1)), vec2(0.0,-1.0), n, 7, 0 )) < 3 )
    return false;
        /* znajdz pole przeciecia */
  a = PolygonArea ( n );
  if ( a < EPS0 ) return false;
        /* znajdz polozenie punktu kolokacji */
  c = CentralPoint ( n );  d = c - vec2 ( float(x), float(y) );
  if ( a < EPS1 && (d.x < MG || d.x > 1.0-MG || d.y < MG || d.y > 1.0-MG) )
    return false;
  cpbuf.cp[z].w = a;
  i0 = ind[0];  i1 = ind[1];  i2 = ind[2];
  m = mat3 ( vec3(txbuf.tt[i0],1.0),
             vec3(txbuf.tt[i1],1.0),
             vec3(txbuf.tt[i2],1.0) );
  bc = inverse ( m ) * vec3 ( c, 1.0 );
  v0 = vec3 ( tsbuf.pt[3*i0], tsbuf.pt[3*i0+1], tsbuf.pt[3*i0+2] );
  v1 = vec3 ( tsbuf.pt[3*i1], tsbuf.pt[3*i1+1], tsbuf.pt[3*i1+2] );
  v2 = vec3 ( tsbuf.pt[3*i2], tsbuf.pt[3*i2+1], tsbuf.pt[3*i2 +2] );
  cpbuf.cp[z].xyz = bc[0]*v0 + bc[1]*v1 + bc[2]*v2;
  return true;
} /*ProcessTexel*/

void CompSwap ( uint x )
{
  uint  i, j, l, h2;
  uvec4 s;

  h2 = ctl.H >> 1;  l = x % h2;  x /= h2;  i = x*ctl.H+l;
  if ( (j = ctl.reverse ? (x+1)*ctl.H-l-1 : i+h2) < ctl.N ) {
    if ( vbuf.tvar[i].z > vbuf.tvar[j].z )
      { s = vbuf.tvar[i];  vbuf.tvar[i] = vbuf.tvar[j];  vbuf.tvar[j] = s; }
  }
} /*CompSwap*/

void PrefixSum ( int i )
{
  uint ii, m0, m1, ia, ib;

  ii = i+i;  m0 = 0x01 << ctl.step;  m1 = m0-1;
  ia = (ii & ~m0) | m1;
  if ( (ib = ia + (i & m1) + 1) < ctl.N )
    vbuf.tvar[ib].x += vbuf.tvar[ia].x;
} /*PrefixSum*/

void main ( void )
{
  int   x, y, z;
  uvec4 tv;

  z = int(gl_GlobalInvocationID.x);
  switch ( ctl.stage ) {
case 0:
    vbuf.tvar[z] = uvec4(0,RESTART_IND_UINT,RESTART_IND_UINT,0);
    vmap.px[z] = uvec2(RESTART_IND_UINT,RESTART_IND_UINT);
    return;
case 1:
    y = int(gl_GlobalInvocationID.y); 
    if ( !ProcessTexel ( z, y ) )
      vbuf.tvar[y*ctl.width + z] = uvec4(0,RESTART_IND_UINT,RESTART_IND_UINT,0);
    return;
case 2:
    y = z + ctl.N/2;
    if ( (ctl.N & 0x00000001) != 0 ) y++;
    vbuf.tvar[z].x += vbuf.tvar[y].x;
    return;
case 3:
    CompSwap ( gl_GlobalInvocationID.x );
    return;
case 4:
    if ( z == 0 )
      vbuf.tvar[z].x = 0;
    else
      vbuf.tvar[z].x = vbuf.tvar[z].z > vbuf.tvar[z-1].z ? 1 : 0;
    return;
case 5:
    PrefixSum ( z );
    return;
case 6:
        /* dla kazdego elementu zapamietaj numer jego obiektu */
    x = int(vbuf.tvar[z].w & 0xFFFF);  y = int(vbuf.tvar[z].w >> 16);
    vobjid.id[z] = objid.id[y*ctl.width + x];
    return;
  }
} /*main*/

