#version 450 core

#define FHEMASK  0x01FFFFFF
#define TAGMASK  0x02000000
#define DEGMASK  0xFC000000
#define DEGSHIFT         26

#define V0   x
#define V1   y
#define FACN z
#define OTHE w

layout (local_size_x=1) in;

layout (std430,binding=0) buffer prSequence { int   a[];    } seq;

layout (std430,binding=1) buffer Inmvf      { int    mvf[]; } inmvf;
layout (std430,binding=2) buffer Inmhe      { ivec4  mhe[]; } inmhe;
layout (std430,binding=3) buffer Invc       { float  vc[];  } inmvc;

layout (std430,binding=4) buffer Outmvf     { int    mvf[]; } outmvf;
layout (std430,binding=5) buffer Outmhe     { ivec4  mhe[]; } outmhe;
layout (std430,binding=6) buffer Outvc      { float  vc[];  } outmvc;

uniform RefineBlock {
    int  stage;
    int  nsattr, inv, inhe, infac, outnv, outnhe, outnfac;
    int  invb, inei, fvf, maxonv, fvhe;
    uint prN0, prN, prStep;
  };

#define imv(I)    inmvf.mvf[I]
#define imfac(I)  inmvf.mvf[inv+(I)]
#define imvhei(I) inmvf.mvf[inv+infac+(I)]
#define imfhei(I) inmvf.mvf[inv+infac+inhe+(I)]
#define imhe(I)   inmhe.mhe[I]
#define imvc(I)   inmvc.vc[I]
#define omv(I)    outmvf.mvf[I]
#define omfac(I)  outmvf.mvf[outnv+(I)]
#define omvhei(I) outmvf.mvf[outnv+outnfac+(I)]
#define omfhei(I) outmvf.mvf[outnv+outnfac+outnhe+(I)]
#define omhe(I)   outmhe.mhe[I]
#define omvc(I)   outmvc.vc[I]

/* doubling arrays in seq buffer */
#define ecn(I)    seq.a[maxonv+(I)]
#define vcn(I)    seq.a[maxonv+inhe+1+(I)]
#define efn(I)    seq.a[maxonv+inhe+inv+2+(I)]
#define wlf(I)    seq.a[maxonv+2*inhe+inv+3+(I)]
#define fcn(I)    seq.a[maxonv+3*inhe+inv+3+(I)]
/* averaging arrays in seq buffer */
#define nvi(I)    seq.a[I]
#define fvnum(I)  seq.a[infac+(I)]
#define nhei(I)   seq.a[2*infac+(I)]
#define nfi(I)    seq.a[2*infac+inhe+(I)]
#define fvd(I)    seq.a[2*infac+inhe+inv+(I)]

#define PREVIFAC_HEDGE(fn,en) \
  ((en) > 0 ? \
    imfhei((imfac(fn) & FHEMASK) + (en) - 1) :\
    imfhei((imfac(fn) & FHEMASK) + (imfac(fn) >> DEGSHIFT) - 1))

void iPrefixSum ( uint i )
{
  uint ii, m0, m1, ia, ib;

  ii = i+i;
  m0 = 0x01 << prStep;  m1 = m0-1;
  ia = (ii & ~m0) | m1;
  if ( (ib = ia + (i & m1) + 1) < prN )
    seq.a[prN0 + ib] += seq.a[prN0 + ia];
} /*iPrefixSum*/

#define AddTwoTerms(I) seq.a[prN0+(I)] += seq.a[prN0+(I)+(prN+1)/2];

void TagVertex ( uint i )
{
  int fhe, deg;

  fhe = imv(i) & FHEMASK;
  deg = imv(i) >> DEGSHIFT;
  if ( imhe(imvhei(fhe+deg-1)).OTHE < 0 ) {
    imv(i) |= TAGMASK;
    seq.a[i] = 1;
  }
  else {
    imv(i) &= ~TAGMASK;
    seq.a[i] = 0;
  }
} /*TagVertex*/

/*#define DSetECN(i) ecn(i+1) = imhe(i).OTHE < 0 ? 6 : 4;*/
void DSetECN ( uint i )
{
  ecn(i+1) = imhe(i).OTHE < 0 ? 6 : 4;
  if ( i == 0 )
    ecn(0) = 0;
} /*DSetECN*/

void DSetVCN ( uint i )
{
  int deg;

  deg = imv(i) >> DEGSHIFT;
  vcn(i+1) = (imv(i) & TAGMASK) != 0 ? deg + 2 : deg;
  if ( i == 0 )
    vcn(0) = 0;
} /*DSetVCN*/

void DCopyVC ( uint i )
{
  int deg, p, j, k;

  deg = imv(i) >> DEGSHIFT;
  if ( (imv(i) & TAGMASK) != 0 )
    deg += 2;
  p = vcn(i);
  for ( j = 0; j < deg; j++ ) {
    for ( k = 0; k < nsattr; k++ )
      omvc((p+j)*nsattr+k) = imvc(i*nsattr+k);
    omv(p+j) = 4 << DEGSHIFT;
  }
  if ( (imv(i) & TAGMASK) != 0 )
    omv(p) = omv(p+deg-1) = 2 << DEGSHIFT;
} /*DCopyVC*/

/*#define DSetOVdeg(i) seq.a[i+1] = omv(i) >> DEGSHIFT;*/
void DSetOVdeg ( uint i )
{
  seq.a[i+1] = omv(i) >> DEGSHIFT;
  if ( i == 0 )
    seq.a[i] = 0;
} /*DSetOVdeg*/

#define DSetOVfhe(i) omv(i) |= seq.a[i];

void DSetWLF ( uint i )
{
  int deg, fhe, k;

  deg = imfac(i) >> DEGSHIFT;
  fhe = imfac(i) & FHEMASK;
  for ( k = 0; k < deg; k++ )
    wlf(imfhei(fhe+k)) = k;
} /*DSetWLF*/

void DSetEFN1 ( uint i )
{
  int j;

  if ( i == 0 )
    efn(i) = infac;
  else {
    j = imhe(i-1).OTHE;
    efn(i) = j < 0 || j >= i ? 1 : 0;
  }
} /*DSetEFN1*/

void DSetEFN2 ( uint i )
{
  int j;

  j = imhe(i).OTHE;
  if ( j >= 0 && j < i )
    efn(i) = efn(j);
} /*DSetEFN2*/

void DSetOMfac1 ( uint i )
{
  omfac(infac+i) = (4 << DEGSHIFT) +
      (imfac(infac-1) & FHEMASK) + (imfac(infac-1) >> DEGSHIFT) + 4*int(i);
} /*DSetOMfac1*/

void DSetOMfac2 ( uint i )
{
  int deg;

  deg = imv(i) >> DEGSHIFT;
  if ( (imv(i) & TAGMASK ) != 0 )
    deg += 2;
  omfac(fvf+i) = deg << DEGSHIFT;
  seq.a[i+1] = deg;
  if ( i == 0 )
    seq.a[i] = 4;
} /*DSetOMfac2*/

#define DSetOMfac3(i) omfac(fvf+i) += (omfac(fvf-1) & FHEMASK) + seq.a[i];

void DBindNewhe1 ( uint i )
{
  int j;

  j = imhe(i).OTHE;
  seq.a[i] = j < 0 || j > i ? 1 : 0;
} /*DBindNewhe1*/

void DBindNewhe2 ( uint i )
{
  int j, ecni;

  j = imhe(i).OTHE;
  ecni = ecn(i);
  omhe(ecni).OTHE = ecni+1;
  omhe(ecni+1).OTHE = ecni;
  omhe(ecni+2).OTHE = ecni+3;
  omhe(ecni+3).OTHE = ecni+2;
  omhe(ecni).FACN = imhe(i).FACN;
  omhe(ecni+3).FACN = fvf + imhe(i).V0;
  if ( j < 0 ) {
    omhe(ecni+4).OTHE = omhe(ecni+5).OTHE = -1;
    omhe(ecni+1).FACN = omhe(ecni+2).FACN =
        omhe(ecni+4).FACN = omhe(ecni+5).FACN = infac + seq.a[i] - 1;
  }
  else if ( i < j )
    omhe(ecni+1).FACN = omhe(ecni+2).FACN = infac + seq.a[i] - 1;
} /*DBindNewhe2*/

void DBindNewhe3 ( uint i )
{
  int j, ecni;

  j = imhe(i).OTHE;
  if ( j >= 0 && j < i ) {
    ecni = ecn(i);
    omhe(ecni+1).FACN = omhe(ecni+2).FACN = omhe(omhe(ecn(j)).OTHE).FACN;
  }
} /*DBindNewhe3*/

#define DSetIFDeg(i) fcn(i) = i == 0 ? 0 : imfac(i-1) >> DEGSHIFT;

void DSetOMfhei1 ( uint i )
{
  int deg, fhe, imfh, j;

  deg = imfac(i) >> DEGSHIFT;
  fhe = fcn(i);
  imfh = imfac(i) & FHEMASK;
  for ( j = 0; j < deg; j++ )
    omfhei(imfh+j) = ecn(imfhei(imfh+j));
} /*DSetOMfhei1*/

void DSetOMfhei2 ( uint i )
{
  int k, ecni;

  k = omfac(efn(i)) & FHEMASK;
  ecni = ecn(i);
  if ( imhe(i).OTHE < 0 ) {
    omfhei(k)   = ecni + 1;
    omfhei(k+1) = ecni + 2;
    omfhei(k+2) = ecni + 4;
    omfhei(k+3) = ecni + 5;
  }
  else if ( imhe(i).OTHE > i ) {
    omfhei(k)   = ecni + 1;
    omfhei(k+1) = ecni + 2;
  }
  else {
    omfhei(k+2) = ecni + 1;
    omfhei(k+3) = ecni + 2;
  }
} /*DSetOMfhei2*/

#define DSetTgv(i) seq.a[i] = i == 0 ? 0 : ((imv(i-1) & TAGMASK) != 0 ? 1 : 0);

void DSetOMfhei3 ( uint i )
{
  int d, j, v0, v1, l, f, ecnl, p, q, k;

  d = imv(i) >> DEGSHIFT;
  j = imv(i) & FHEMASK;
  if ( (imv(i) & TAGMASK) != 0 ) {
    v0 = vcn(i);
    l = imvhei(j);
    f = imhe(l).FACN;
    l = PREVIFAC_HEDGE ( f, wlf(l) );
    ecnl = ecn(l);
    omhe(ecnl+4).V1 = v0;
    q = fvhe + 2*seq.a[i];
    omhe(ecnl+5).OTHE = p = q + 1;
    omhe(p).OTHE = ecnl+5;
    omhe(ecnl+5).V0 = omhe(p).V1 = v0;
    omhe(ecnl+5).V1 = omhe(p).V0 = v0+1;
    omvhei(omv(v0) & FHEMASK) = ecnl+5;
    omhe(q).OTHE = -1;
    omhe(q).V0 = v0;
    omhe(q).V1 = v0+d+1;
    omvhei((omv(v0) & FHEMASK)+1) = q;
    omhe(q).FACN = omhe(p).FACN = fvf+int(i);
    omfhei(omfac(fvf+i) & FHEMASK) = q;
    for ( k = 0; k < d; k++ ) {
      v0 = vcn(i) + k + 1;
      l = imvhei((imv(i) & FHEMASK) + k);
      f = imhe(l).FACN;
      ecnl = ecn(l);
      omvhei(omv(v0) & FHEMASK) = p;
      omfhei((omfac(fvf+i) & FHEMASK)+d+1-k) = p;
      omhe(ecnl).V0 = omhe(ecnl+1).V1 = v0;
      omhe(omhe(ecnl).OTHE).V1 = v0;
      omhe(ecnl+2).V0 = omhe(ecnl+3).V1 = v0;
      omhe(ecnl+2).V1 = omhe(ecnl+3).V0 = v0+1;
      omvhei((omv(v0) & FHEMASK)+2) = ecnl;
      omvhei((omv(v0) & FHEMASK)+3) = ecnl+2;
      p = ecnl+3;
      l = PREVIFAC_HEDGE ( f, wlf(l) );
      ecnl = ecn(l);
      omhe(ecnl).V1 = v0;
      omhe(omhe(ecnl).OTHE).V0 = v0;
      omvhei((omv(v0) & FHEMASK)+1) = ecnl+1;
    }
    omfhei((omfac(fvf+i) & FHEMASK)+1) = p;
    l = imvhei((imv(i) & FHEMASK)+d-1);
    ecnl = ecn(l);
    omhe(ecnl+4).V0 = vcn(i)+d+1;
    omvhei(omv(vcn(i)+d+1) & FHEMASK) = ecnl+3;
    omvhei((omv(vcn(i)+d+1) & FHEMASK)+1) = ecnl + 4;
  }
  else {
    for ( k = 0; k < d; k++ ) {
      v0 = vcn(i) + k;
      v1 = ( k < d-1) ? v0+1 : vcn(i);
      l = imvhei((imv(i) & FHEMASK)+k);
      f = imhe(l).FACN;
      ecnl = ecn(l);
      omhe(ecnl).V0 = v0;
      omhe(ecnl+2).V0 = omhe(ecnl+3).V1 = v0;
      omhe(ecnl+2).V1 = omhe(ecnl+3).V0 = v1;
      omhe(omhe(ecnl).OTHE).V1 = v0;
      omvhei(omv(v0) & FHEMASK) = ecnl;
      omvhei((omv(v0) & FHEMASK)+1) = ecnl+2;
      omvhei((omv(v1) & FHEMASK)+2) = ecnl+3;
      omfhei((omfac(fvf+i) & FHEMASK)+d-1-k) = ecnl+3;
      l = PREVIFAC_HEDGE ( f, wlf(l) );
      ecnl = ecn(l);
      omhe(ecnl).V1 = v0;
      omhe(omhe(ecnl).OTHE).V0 = v0;
      omvhei((omv(v0) & FHEMASK)+3) = omhe(ecnl).OTHE;
    }
  }
} /*DSetOMfhei3*/

#define ASetNvi1(i) { nvi(i) = int(i); fvnum(i) = 1; }
#define ASetNhei1(i) nhei(i) = int(i);
#define ASetNfi1(i) nfi(i) = int(i);

void ASetNvi2 ( uint i, bool first )
{
  int  d, fhe, v0, v1, j, k, l;
  bool s0, s1;

  d = imfac(i) >> DEGSHIFT;
  fhe = imfac(i) & FHEMASK;
  for ( j = 0, s0 = true;  j < d;  j++ ) {
    v0 = imhe(imfhei(fhe+j)).V0;
    if ( (imv(v0) & TAGMASK) != 0 ) {
      s0 = false;
      break;
    }
  }
  if ( s0 )
    k = 1;
  else {
    s0 = (imv(v0) & TAGMASK) != 0;
    for ( l = k = 0;  l < d;  l++ ) {
      v1 = imhe(imfhei(fhe+j)).V1;
      s1 = (imv(v1) & TAGMASK) != 0;
      if ( s0 && !s1 )
        k ++;
      v0 = v1;  s0 = s1;
      j = j >= d-1 ? 0 : j+1;
    }
  }
  if ( first ) {
    fvnum(i) = fvd(i) = k;
    if ( i == 0 )
      nvi(0) = 0;
    if ( i < infac-1 )
      nvi(i+1) = k;
  }
  else if ( k == 0 )
    nvi(i) = -1;
} /*ASetNvi2*/

void ASetNfi2 ( uint i )
{
  if ( i == 0 )
    nfi(i) = 0;
  else
    nfi(i) = int((imv(i-1) & TAGMASK) == 0);
} /*ASetNfi2*/

void ASetNhei2 ( uint i )
{
  if ( i == 0 )
    nhei(i) = 0;
  else
    nhei(i) = int((imv(imhe(i-1).V1) & TAGMASK) == 0);
} /*ASetNhei2*/

#define ASetNhei3(i) { if ( (imv(imhe(i).V1) & TAGMASK) != 0 ) nhei(i) = -1; }
#define ASetNfi3(i) { if ( (imv(i) & TAGMASK) != 0 ) nfi(i) = -1; }
#define AClearFVd(i) fvd(i) = 0;

void ASetFVd1 ( uint i )
{
  int k;

  if ( (k = nfi(i)) >= 0 )
    fvd(k) = imv(i) >> DEGSHIFT;
} /*ASetFVd1*/

void ASetFVd2 ( uint i )
{
  int k;

  if ( (k = nfi(i)) >= 0 )
    omfac(k) = (imv(i) & DEGMASK) | (k > 0 ? fvd(k-1) : 0);
} /*ASetFVd2*/

void ASetOMVert ( uint i, bool first )
{
  int n, d, fhe, r, s, t, j, k, l, v0, v1, m, e;

  if ( (r = fvnum(i)) > 0 ) {
    n = nvi(i);
    d = imfac(i) >> DEGSHIFT;
    fhe = imfac(i) & FHEMASK;
    for ( k = 0; k < d; k++ ) {
      v1 = imhe(imfhei(fhe+k)).V1;
      if ( (imv(v1) & TAGMASK) != 0 )
        break;
    }
    if ( !first )
      j = i > 0 ? fvd(i-1) : 0;
    for ( s = 0;  s < r;  s++, n++ ) {
      do {
        k = k >= d-1 ? 0 : k+1;
        v1 = imhe(imfhei(fhe+k)).V1;
      } while ( (imv(v1) & TAGMASK) != 0 );
      for ( m = 0, t = (k+1) % d;  m < d;  m++, t = (t+1) % d ) {
        v0 = imhe(imfhei(fhe+t)).V0;
        if ( (imv(v0) & TAGMASK) != 0 )
          break;
      }
      if ( first )
        fvd(i) += m;
      else {
        omv(n) = (m << DEGSHIFT) | j;
        for ( l = m-1, t = k;  l >= 0;  l--, t = (t+1) % d ) {
          v1 = imhe(imfhei(fhe+t)).V1;
          omvhei(j+l) = e = nhei(imfhei(fhe+t));
          omhe(e).V0 = n;
          omhe(e).FACN = nfi(v1);
        }
        j += m;
      }
    }
  }
} /*ASetOMVert*/

void ABindHe ( uint i )
{
  int k;

  if ( (k = nhei(i)) >= 0 )
    omhe(k).OTHE = nhei(imhe(i).OTHE);
} /*ABindHe*/

void ASetOMfacHe ( uint i )
{
  int k, d, j, l, m, v1;

  if ( (k = nfi(i)) >= 0 ) {
    d = imv(i) >> DEGSHIFT;
    j = imv(i) & FHEMASK;
    l = omfac(k) & FHEMASK;
    for ( m = 0; m < d; m++ )
      omfhei(l+m) = nhei(imhe(imvhei(j+d-1-m)).OTHE);
    for ( m = d-1, v1 = omhe(omfhei(l)).V0;
          m >= 0;
          v1 = omhe(omfhei(l+m)).V0, m-- )
      omhe(omfhei(l+m)).V1 = v1;
  }
} /*ASetOMfacHe*/

void Average ( uint i )
{
  int   r, n, d, j, iv, ov, k, l;
  float id;

  if ( (r = fvnum(i)) > 0 ) {
    n = nvi(i);
    ov = n*nsattr;
    d = imfac(i) >> DEGSHIFT;
    j = imfac(i) & FHEMASK;
    iv = imhe(imfhei(j)).V0*nsattr;
    for ( l = 0; l < nsattr; l++ )
      omvc(ov+l) = imvc(iv+l);
    for ( k = 1; k < d; k++ ) {
      iv = imhe(imfhei(j+k)).V0*nsattr;
      for ( l = 0; l < nsattr; l++ )
        omvc(ov+l) += imvc(iv+l);
    }
    id = 1.0/float(d);
    for ( l = 0; l < nsattr; l++ )
      omvc(ov+l) *= id;
    for ( k = 0;  k < (r-1)*nsattr;  k++ )
      omvc(ov+k+nsattr) = omvc(ov+k);
  }
} /*Average*/

void main ( void )
{
  uint i;

  i = gl_GlobalInvocationID.x;
  switch ( stage ) {
case  0: iPrefixSum ( i );        break;
case  1: AddTwoTerms ( i );       break;
case  2: TagVertex ( i );         break;
        /* doubling stages */
case  3: DSetECN ( i );           break;
case  4: DSetVCN ( i );           break;
case  5: DCopyVC ( i );           break;
case  6: DSetOVdeg ( i );         break;
case  7: DSetOVfhe ( i );         break;
case  8: DSetWLF ( i );           break;
case  9: DSetEFN1 ( i );          break;
case 10: DSetEFN2 ( i );          break;
case 11: DSetOMfac1 ( i );        break;
case 12: DSetOMfac2 ( i );        break;
case 13: DSetOMfac3 ( i );        break;
case 14: DBindNewhe1 ( i );       break;
case 15: DBindNewhe2 ( i );       break;
case 16: DBindNewhe3 ( i );       break;
case 17: DSetIFDeg ( i );         break;
case 18: DSetOMfhei1 ( i );       break;
case 19: DSetOMfhei2 ( i );       break;
case 20: DSetTgv ( i );           break;
case 21: DSetOMfhei3 ( i );       break;
        /* averaging stages */
case 22: ASetNvi1 ( i );          break;
case 23: ASetNhei1 ( i );         break;
case 24: ASetNfi1 ( i );          break;
case 25: ASetNvi2 ( i, true );    break;
case 26: ASetNfi2 ( i );          break;
case 27: ASetNhei2 ( i );         break;
case 28: ASetNvi2 ( i, false );   break;
case 29: ASetNhei3 ( i );         break;
case 30: ASetNfi3 ( i );          break;
case 31: AClearFVd ( i );         break;
case 32: ASetFVd1 ( i );          break;
case 33: ASetFVd2 ( i );          break;
case 34: AClearFVd ( i );         break;
case 35: ASetOMVert ( i, true );  break;
case 36: ASetOMVert ( i, false ); break;
case 37: ABindHe ( i );           break;
case 38: ASetOMfacHe ( i );       break;
case 39: Average ( i );           break;
default: break;
  }
} /*main*/

