The Allegro Wiki is migrating to github at https://github.com/liballeg/allegro_wiki/wiki

GradientFill

From Allegro Wiki
Revision as of 15:13, October 3, 2008 by Matt Smith (talk | contribs) (Fixed Point for R,G & B deltas)
Jump to: navigation, search
This article is incomplete and needs some revision or has some pending TODOs. Please help Allegro by editing and finishing it. When the article becomes complete, you may remove this tag.


Diffuse Gradient RectFill

Functions to draw a rectangle on a bitmap with an error-diffused gradient fill.

There are two functions:

<highlightSyntax language="cpp">void rectfill_gradient(BITMAP *bmp, int x1, int y1, int x2, int y2, int c1, int c2, int c3, int c4)

</highlightSyntax>

Fills without error diffusion. This gives rise to banding which is terrible in 8 bit, and usually visible in 16 bit. It can even be discernable with 24 bit (try 4 close shades of light blue)

<highlightSyntax language="cpp">void rectfill_gradient_diffuse(BITMAP *bmp, int x1, int y1, int x2, int y2, int c1, int c2, int c3, int c4)

</highlightSyntax>

Fills with error diffusion.

  The parameters are  
  • bmp Bitmap to draw to
  • x1,y1 Left, Top
  • x2,y2 Right, Bottom
  • c1,c2 Color Top Left, Top Right
  • c3,c4 Color Bottom Left, Bottom Right

Simple C Version

This version uses the standard user functions so works on any platform in any colour depth. If you are using 8 bit then it will be wise to select a palette with decent coverage and call create_rgb_table() beforehand.


<highlightSyntax language="cpp">int rectfill_gradient(BITMAP * bmp, int x1, int y1, int x2, int y2,

              int color1,int color2,int color3,int color4 )

{ float c1_r = getr(color1); float c1_g = getg(color1); float c1_b = getb(color1);

float c2_r = getr(color2); float c2_g = getg(color2); float c2_b = getb(color2);

float c3_r = getr(color3); float c3_g = getg(color3); float c3_b = getb(color3);

float c4_r = getr(color4); float c4_g = getg(color4); float c4_b = getb(color4);

float dx_r,dx_g,dx_b; float dy1_r,dy1_g,dy1_b; float dy2_r,dy2_g,dy2_b;

float height = y2- y1, width = x2-x1;

int x,y;

float r,g,b;


  dy1_r =  (c3_r-c1_r)/height;
  dy1_g =  (c3_g-c1_g)/height;
  dy1_b =  (c3_b-c1_b)/height;
  dy2_r =  (c4_r-c2_r)/height;
  dy2_g =  (c4_g-c2_g)/height;
  dy2_b =  (c4_b-c2_b)/height;


  for (y=y1 ; y<y2; y++)
  {
     dx_r =  (c2_r-c1_r)/width;
     dx_g =  (c2_g-c1_g)/width;
     dx_b =  (c2_b-c1_b)/width;
     r = c1_r; g = c1_g; b = c1_b;
     for (x=x1 ; x<x2; x++)
     {
        putpixel(bmp,x,y,makecol(r,g,b));
        r += dx_r;
        g += dx_g;
        b += dx_b;
     }
     c1_r += dy1_r; c1_g += dy1_g; c1_b += dy1_b;
     c2_r += dy2_r; c2_g += dy2_g; c2_b += dy2_b;
  
  }

}

</highlightSyntax>




Simple version with Diffusion

<highlightSyntax language="cpp"> int rectfill_gradient_diffuse(BITMAP * bmp, int x1, int y1, int x2, int y2,

              int color1,int color2,int color3,int color4 )

{ float c1_r = getr(color1), c1_g = getg(color1), c1_b = getb(color1); float c2_r = getr(color2), c2_g = getg(color2), c2_b = getb(color2); float c3_r = getr(color3), c3_g = getg(color3), c3_b = getb(color3); float c4_r = getr(color4), c4_g = getg(color4), c4_b = getb(color4);

float dx_r,dx_g,dx_b; float dy1_r,dy1_g,dy1_b; float dy2_r,dy2_g,dy2_b;

float height = y2- y1, width = x2-x1;

int x,y;

float r,g,b;

float ey_r[1280], ey_g[1280], ey_b[1280]; /*float * ey_r, * ey_g, * ey_b;

  ey_r = malloc(width * sizeof(float)); for (x=0;x<width;x++) ey_r[x]=0.0; 
  ey_g = malloc(width * sizeof(float)); for (x=0;x<width;x++) ey_g[x]=0.0;
  ey_b = malloc(width * sizeof(float)); for (x=0;x<width;x++) ey_b[x]=0.0;
  • /
  for (x=0;x<1280;x++) { ey_r[x]=0.0; ey_g[x]=0.0; ey_b[x]=0.0;};
  
  dy1_r =  (c3_r-c1_r)/height;
  dy1_g =  (c3_g-c1_g)/height;
  dy1_b =  (c3_b-c1_b)/height;
  dy2_r =  (c4_r-c2_r)/height;
  dy2_g =  (c4_g-c2_g)/height;
  dy2_b =  (c4_b-c2_b)/height;


  for (y=y1 ; y<y2; y++)
  {
     float ex_r=0.0,ex_g=0.0,ex_b=0.0;
     
     dx_r =  (c2_r-c1_r)/width;
     dx_g =  (c2_g-c1_g)/width;
     dx_b =  (c2_b-c1_b)/width;
     r = c1_r; g = c1_g; b = c1_b;
     for (x=x1 ; x<x2; x++)
     {
        int best;
        float best_r, best_g, best_b, want_r, want_g, want_b;
        float damp = 2.0;
        want_r = r + ex_r/damp + ey_r[x]/damp; 
        want_g = g + ex_g/damp + ey_g[x]/damp; 
        want_b = b + ex_b/damp + ey_b[x]/damp; 
        
        best = makecol(MID(0.0,want_r,255.5),
                      MID(0.0,want_g,255.5),
                    MID(0.0,want_b,255.5));
        
        best_r = getr(best);
        best_g = getg(best);
        best_b = getb(best);
        
        putpixel(bmp,x,y,best);
        
        ex_r = ey_r[x] = want_r - best_r; 
        ex_g = ey_g[x] = want_g - best_g; 
        ex_b = ey_b[x] = want_b - best_b; 
        
        r += dx_r;
        g += dx_g;
        b += dx_b;
     }
     c1_r += dy1_r; c1_g += dy1_g; c1_b += dy1_b;
     c2_r += dy2_r; c2_g += dy2_g; c2_b += dy2_b;
  
  }

/*

  free(ey_r); 
  free(ey_g); 
  free(ey_b); 
  • /

}


</highlightSyntax>

gradientfill.h

<highlightSyntax language="cpp"> int rectfill_gradient(BITMAP * bmp, int x1, int y1, int x2, int y2,

              int color1,int color2,int color3,int color4 )
  /* draws a filled rectangle x1,y1,x2,y2 with a graduated tint.
              color 1 = top left, color 2 = top right, color 
              3 = bottom left, color 4 = bottom right */ 

int rectfill_gradient_diffuse(BITMAP * bmp, int x1, int y1, int x2, int y2,

              int color1,int color2,int color3,int color4 ); 
  /* the same as above, but with error diffusion shading. This function is slower
  but gives higher quality results in lower color depths */ 
</highlightSyntax>


Making it faster

The functions shown above work well enough but do not perform very well. You can use them for prerendering but are too slow to use every frame.

Stage 1 - Separating the hline functions

This first change doesn't actually make anything faster, but is the first step in making an Allegro style drawing function. hlines are the primitive used to draw many things in the Allegro 2d API , including Circles, Triangles and Polys.

<highlightSyntax language="cpp"> hline_gradient(BITMAP * bmp, int x1, int y, int x2,

              float r1, float g1, float b1,
              float r2, float g2, float b2)

{

int x;
  
float width = x2-x1;
float dx_r =  (r2-r1)/width;
float dx_g =  (g2-g1)/width;
float dx_b =  (b2-b1)/width;
      
     for (x=x1 ; x<=x2; x++)
     {
        putpixel(bmp,x,y,makecol(r1,g1,b1));
        r1 += dx_r;
        g1 += dx_g;
        b1 += dx_b;
     }

}


hline_gradient_diffuse(BITMAP * bmp, int x1, int y, int x2,

              float r1, float g1, float b1,
              float r2, float g2, float b2,
              float * ey_r, float * ey_g, float * ey_b)

{

float width = x2-x1;
float dx_r =  (r2-r1)/width;
float dx_g =  (g2-g1)/width;
float dx_b =  (b2-b1)/width;
float ex_r=0.0,ex_g=0.0,ex_b=0.0;

int x;
     for (x=x1 ; x<=x2; x++)
     {
        int best;
        float best_r, best_g, best_b, want_r, want_g, want_b;
        float damp = 2.0;
        want_r = r1 + ex_r/damp + ey_r[x]/damp; 
        want_g = g1 + ex_g/damp + ey_g[x]/damp; 
        want_b = b1 + ex_b/damp + ey_b[x]/damp; 
        
        best = makecol(MID(0.0,want_r,255.5),
                      MID(0.0,want_g,255.5),
                    MID(0.0,want_b,255.5));
        
        best_r = getr(best);
        best_g = getg(best);
        best_b = getb(best);
        
        putpixel(bmp,x,y,best);
        
        ex_r = ey_r[x] = want_r - best_r; 
        ex_g = ey_g[x] = want_g - best_g; 
        ex_b = ey_b[x] = want_b - best_b; 
        
        r1 += dx_r;
        g1 += dx_g;
        b1 += dx_b;
     }

}


int rectfill_gradient(BITMAP * bmp, int x1, int y1, int x2, int y2,

              int color1,int color2,int color3,int color4 )

{

float c1_r = getr(color1), c1_g = getg(color1), c1_b = getb(color1); float c2_r = getr(color2), c2_g = getg(color2), c2_b = getb(color2); float c3_r = getr(color3), c3_g = getg(color3), c3_b = getb(color3); float c4_r = getr(color4), c4_g = getg(color4), c4_b = getb(color4);


float dx_r,dx_g,dx_b; float dy1_r,dy1_g,dy1_b; float dy2_r,dy2_g,dy2_b;

float height = y2- y1, width = x2-x1;

int x,y;

//float r,g,b;


  dy1_r =  (c3_r-c1_r)/height;
  dy1_g =  (c3_g-c1_g)/height;
  dy1_b =  (c3_b-c1_b)/height;
  dy2_r =  (c4_r-c2_r)/height;
  dy2_g =  (c4_g-c2_g)/height;
  dy2_b =  (c4_b-c2_b)/height;


  for (y=y1 ; y<y2; y++)
  {
     hline_gradient(bmp, x1, y, x2,
                   c1_r, c1_g, c1_b,
                 c2_r, c2_g, c2_b);
     c1_r += dy1_r; c1_g += dy1_g; c1_b += dy1_b;
     c2_r += dy2_r; c2_g += dy2_g; c2_b += dy2_b;
  
  }

}


int rectfill_gradient_diffuse(BITMAP * bmp, int x1, int y1, int x2, int y2,

              int color1,int color2,int color3,int color4 )

{

float c1_r = getr(color1), c1_g = getg(color1), c1_b = getb(color1); float c2_r = getr(color2), c2_g = getg(color2), c2_b = getb(color2); float c3_r = getr(color3), c3_g = getg(color3), c3_b = getb(color3); float c4_r = getr(color4), c4_g = getg(color4), c4_b = getb(color4);

float dx_r,dx_g,dx_b; float dy1_r,dy1_g,dy1_b; float dy2_r,dy2_g,dy2_b;

float height = y2- y1, width = x2-x1;

int x,y;

float r,g,b;

float ey_r[1280], ey_g[1280], ey_b[1280];

  for (x=0;x<1280;x++) { ey_r[x]=0.0; ey_g[x]=0.0; ey_b[x]=0.0;};
  
  dy1_r =  (c3_r-c1_r)/height;
  dy1_g =  (c3_g-c1_g)/height;
  dy1_b =  (c3_b-c1_b)/height;
  dy2_r =  (c4_r-c2_r)/height;
  dy2_g =  (c4_g-c2_g)/height;
  dy2_b =  (c4_b-c2_b)/height;


  for (y=y1 ; y<y2; y++)
  {
     hline_gradient_diffuse(bmp, x1, y , x2,
                       c1_r, c1_g, c1_b,
                       c2_r, c2_g, c2_b,
                       ey_r,ey_g,ey_b);
     
     c1_r += dy1_r; c1_g += dy1_g; c1_b += dy1_b;
     c2_r += dy2_r; c2_g += dy2_g; c2_b += dy2_b;
  
  }

}


</highlightSyntax>


This exposes two new user-style functions, hline_gradient() and hline_gradient_diffuse . The rectfill functions now call the hline functions, so the hline functions are now the candidates for MicroOptimisation.


Stage 2 - Separating the Color Depths

For every pixel, make_col() is called once (to find the closest usable colour) and getr(), getb(), getg() are called once (by the diffusing version) to break this colour down again. These generic user functions work in all color depths on all platforms, but come with the speed penalty. Great speed savings can be made by making a version for each color depth that Allegro supports.

In the following version of hline_gradient, the depth of the bitmap is tested once and the appropriate internal function called, this is only shown as a quick example as I want to get onto the next Stage

<highlightSyntax language="cpp">_hline_gradient_8(BITMAP * bmp, int x1, int y, int x2,

              float r1, float g1, float b1,
              float r2, float g2, float b2)

{

int x;
  
float width = x2-x1;
float dx_r =  (r2-r1)/width;
float dx_g =  (g2-g1)/width;
float dx_b =  (b2-b1)/width;
      
     for (x=x1 ; x<=x2; x++)
     {
        putpixel8(bmp,x,y,makecol8(r1,g1,b1));   // uses depth-specific functions
        r1 += dx_r;
        g1 += dx_g;
        b1 += dx_b;
     }

}

hline_gradient(BITMAP * bmp, int x1, int y, int x2,

              float r1, float g1, float b1,
              float r2, float g2, float b2)

{

int x;
  
float width = x2-x1;
float dx_r =  (r2-r1)/width;
float dx_g =  (g2-g1)/width;
float dx_b =  (b2-b1)/width;
      
  switch (bitmap_color_depth(bmp))
  {
     case 8:
        for (y=1; y<=y2; y++)
        {
           _hline_gradient_8(bmp,x1,y,x2,r1,g1,b1,r2,g2,b2);
           c1_r += dy1_r; c1_g += dy1_g; c1_b += dy1_b;
           c2_r += dy2_r; c2_g += dy2_g; c2_b += dy2_b;
        }
        
        break;
     
     case 15:
     case 16:
                   ...      
  }      

}


</highlightSyntax>

Stage 3 - Passing Line Pointers

The hlines don't need to know about the whole bitmap, just the line they are working on. With nearly all allegro bitmaps this is a contiguous block of memory with pixels packed in left to right.

<highlightSyntax language="cpp">_hline_gradient_linear_8(void * line, int x1, int x2,

           float r1, float g1, float b1,
           float r2, float g2, float b2)

{

int x;
  
float width = x2-x1;
float dx_r =  (r2-r1)/width;
float dx_g =  (g2-g1)/width;
float dx_b =  (b2-b1)/width;
      
     for (x=x1 ; x<=x2; x++)
     {
        * (char *) line[x] = makecol8(r1,g1,b1); 
        r1 += dx_r;
        g1 += dx_g;
        b1 += dx_b;
     }

}

</highlightSyntax>

The line is passed as a void * rather than a char * in order to keep the same parameter types for all depths. This makes it possible to use functions pointers to select between them. FunctionPointers are often grouped together as Vtables.



Fixed Point for R,G & B deltas

Using FixedPoint values for the colours seems like an obvious improvement, because this would avoid the expensize float->int conversions. However, because the deltas are calculated and added repeatedly, errors multiply rapidly, and it turns out that 32 bits (8 bit value + 24 bit mantissa) are barely adequate.

As a compromise, it is feasible to pass Allegro's supported 16:16 format to the hline funcs, and use floats for the y deltas

Here is a 16-bit version, which takes the color params as FIXED type.

<highlightSyntax language="cpp">_hline_gradient_linear_16(void * line, int x1, int x2,

              int r1, int g1, int b1,
              int r2, int g2, int b2)

{

int x;
  
int dw =  65536.0 / (0.5+x2-x1);  // fixed reciprocal of width  
int dx_r =  fixmul(r2-r1 , dw);         //because multiplying is quicker than dividing 
int dx_g =  fixmul(g2-g1 , dw);
int dx_b =  fixmul(b2-b1 , dw);
      
     for (x=x1 ; x<=x2; x++)
     {
        * ((short *)line)[x] = makecol16(r1>>16,g1>>16,b1>>16);
        r1 += dx_r;
        g1 += dx_g;
        b1 += dx_b;
     }

}

</highlightSyntax>

Now when you write memory on a PC (and most systems) it is faster if you write chunks of 32 or 64 bits at a time.

Most compilers aren't smart enough to unroll the x loop above so it writes aligned words, which is a shame, because we have to manually account for the different endian conventions.

This following version takes advantage of allegro's endian defines.

<highlightSyntax language="cpp"> _hline_gradient_linear_16(void * line, int x1, int x2,

              int r1, int g1, int b1,
              int r2, int g2, int b2)

{

int x;
  
int dw =  65536.0 / (0.5+x2-x1);  // fixed reciprocal of width  
int dx_r =  fixmul(r2-r1 , dw);         //because multiplying is quicker than dividing 
int dx_g =  fixmul(g2-g1 , dw);
int dx_b =  fixmul(b2-b1 , dw);
      

 x=x1;
  if (x & 1)  //starts on an odd one
  {
        ((short *)line)[x++] = makecol16(r1>>16,g1>>16,b1>>16);
        r1 += dx_r; g1 += dx_g; b1 += dx_b;
  }


  if (x2 & 1)  // ends on an odd one
  {      
     if (x>=x2) return; 
     x2 &= ~1; // remove oddness (for quicker comparison with x) 
     
     while (x < x2)
     {
  1. if ALLEGRO_BIG_ENDIAN
        int w1 = makecol16(r1>>16,g1>>16,b1>>16);
  1. else
        int w1 = makecol16(r1>>16,g1>>16,b1>>16) << 16;
  1. endif
        r1 += dx_r; g1 += dx_g; b1 += dx_b;
  1. if ALLEGRO_BIG_ENDIAN
        w1 += makecol16(r1>>16,g1>>16,b1>>16) << 16;
  1. else
        w1 += makecol16(r1>>16,g1>>16,b1>>16);
  1. endif
        r1 += dx_r; g1 += dx_g; b1 += dx_b;
        * (int*)(&((short *)line)[x+=2]) = w1;  // writes int to array of shorts (isn't casting cool?)    
     }   
        ((short *)line)[x++] = makecol16(r1>>16,g1>>16,b1>>16);
        //r1 += dx_r; g1 += dx_g; b1 += dx_b; no need, not used again
     
  } else {   
     
     while (x < x2)
     {
  1. if ALLEGRO_BIG_ENDIAN
        int w1 = makecol16(r1>>16,g1>>16,b1>>16);
  1. else
        int w1 = makecol16(r1>>16,g1>>16,b1>>16) << 16;
  1. endif
        r1 += dx_r; g1 += dx_g; b1 += dx_b;
  1. if ALLEGRO_BIG_ENDIAN
        w1 += makecol16(r1>>16,g1>>16,b1>>16) << 16;
  1. else
        w1 += makecol16(r1>>16,g1>>16,b1>>16);
  1. endif
        r1 += dx_r; g1 += dx_g; b1 += dx_b;
        * (int*)(&((short *)line)[x+=2]) = w1;    
     }   
  }


}

</highlightSyntax>

The only endian consideration is the <<16 needs to be on the third makecol() instead of the second, so this is done with #ifdefs because you only need the version for your system in your lib.

This is about as far as we can go with C, so we'll keep this version for platforms without an assembler.

Now it is getting time to look at what the compiler is doing to optimise this. It is very unlikely to have spotted that we are working with shorts and could use some MMX code here

TODO: {{{1}}} (edit)


compile with -O3 and -mmmx to see if gcc is any good yet.