/*
This file is part of darktable,
copyright (c) 2016 johannes hanika.
darktable is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
darktable is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with darktable. If not, see .
*/
#include "common.h"
kernel void
pad_input(
read_only image2d_t input,
write_only image2d_t padded,
const int wd, // dimensions of input
const int ht,
const int max_supp, // size of border
const int wd2, // padded dimensions
const int ht2)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
int cx = x - max_supp, cy = y - max_supp;
if(x >= wd2 || y >= ht2) return;
// fill boundary with max_supp px:
if(cx >= wd) cx = wd-1;
if(cy >= ht) cy = ht-1;
if(cx < 0) cx = 0;
if(cy < 0) cy = 0;
float4 pixel = read_imagef(input, sampleri, (int2)(cx, cy));
write_imagef (padded, (int2)(x, y), pixel.x*0.01f);
}
float expand_gaussian(
read_only image2d_t coarse,
const int i,
const int j,
const int wd,
const int ht)
{
float c = 0.0f;
const float w[5] = {1.0f/16.0f, 4.0f/16.0f, 6.0f/16.0f, 4.0f/16.0f, 1.0f/16.0f};
const int cx = i/2;
const int cy = j/2;
switch((i&1) + 2*(j&1))
{
case 0: // both are even, 3x3 stencil
for(int ii=-1;ii<=1;ii++) for(int jj=-1;jj<=1;jj++)
{
float4 pixel = read_imagef(coarse, sampleri, (int2)(cx+ii, cy+jj));
c += pixel.x*w[2*jj+2]*w[2*ii+2];
}
break;
case 1: // i is odd, 2x3 stencil
for(int ii=0;ii<=1;ii++) for(int jj=-1;jj<=1;jj++)
{
float4 pixel = read_imagef(coarse, sampleri, (int2)(cx+ii, cy+jj));
c += pixel.x*w[2*jj+2]*w[2*ii+1];
}
break;
case 2: // j is odd, 3x2 stencil
for(int ii=-1;ii<=1;ii++) for(int jj=0;jj<=1;jj++)
{
float4 pixel = read_imagef(coarse, sampleri, (int2)(cx+ii, cy+jj));
c += pixel.x*w[2*jj+1]*w[2*ii+2];
}
break;
default: // case 3: // both are odd, 2x2 stencil
for(int ii=0;ii<=1;ii++) for(int jj=0;jj<=1;jj++)
{
float4 pixel = read_imagef(coarse, sampleri, (int2)(cx+ii, cy+jj));
c += pixel.x*w[2*jj+1]*w[2*ii+1];
}
break;
}
return 4.0f * c;
}
kernel void
gauss_expand(
read_only image2d_t coarse, // coarse input
write_only image2d_t fine, // upsampled blurry output
const int wd, // resolution of fine, also run kernel on fine res
const int ht)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
int cx = x, cy = y;
float4 pixel;
if(x >= wd || y >= ht) return;
// fill boundary with 1 or 2 px:
if(wd & 1) { if(x > wd-2) cx = wd-2; }
else { if(x > wd-3) cx = wd-3; }
if(ht & 1) { if(y > ht-2) cy = ht-2; }
else { if(y > ht-3) cy = ht-3; }
if(cx <= 0) cx = 1;
if(cy <= 0) cy = 1;
pixel.x = expand_gaussian(coarse, cx, cy, wd, ht);
write_imagef (fine, (int2)(x, y), pixel);
}
kernel void
gauss_reduce(
read_only image2d_t input, // fine input buffer
write_only image2d_t coarse, // coarse scale, blurred input buf
const int wd, // coarse res (also run this kernel on coarse res only)
const int ht)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
int cx = x, cy = y;
float4 pixel;
if(x >= wd || y >= ht) return;
// fill boundary with 1 px:
if(x >= wd-1) cx = wd-2;
if(y >= ht-1) cy = ht-2;
if(cx <= 0) cx = 1;
if(cy <= 0) cy = 1;
// blur, store only coarse res
pixel.x = 0.0f;
const float w[5] = {1.0f/16.0f, 4.0f/16.0f, 6.0f/16.0f, 4.0f/16.0f, 1.0f/16.0f};
// direct 5x5 stencil only on required pixels:
for(int jj=-2;jj<=2;jj++) for(int ii=-2;ii<=2;ii++)
pixel.x += read_imagef(input, sampleri, (int2)(2*cx+ii, 2*cy+jj)).x * w[ii+2] * w[jj+2];
write_imagef (coarse, (int2)(x, y), pixel);
}
float laplacian(
read_only image2d_t coarse, // coarse res gaussian
read_only image2d_t fine, // fine res gaussian
const int i, // fine index
const int j,
const int ci, // clamped fine index
const int cj,
const int wd, // fine width
const int ht) // fine height
{
const float c = expand_gaussian(coarse, ci, cj, wd, ht);
return read_imagef(fine, sampleri, (int2)(i, j)).x - c;
}
kernel void
laplacian_assemble(
read_only image2d_t input, // original input buffer, gauss at current fine pyramid level
read_only image2d_t output1, // state of reconstruction, coarse output buffer
write_only image2d_t output0, // reconstruction, one level finer, run kernel on this dimension
read_only image2d_t buf_g0_l0, // image2d_array_t only supported in ocl 2.0 :(
read_only image2d_t buf_g0_l1,
read_only image2d_t buf_g1_l0,
read_only image2d_t buf_g1_l1,
read_only image2d_t buf_g2_l0,
read_only image2d_t buf_g2_l1,
read_only image2d_t buf_g3_l0,
read_only image2d_t buf_g3_l1,
read_only image2d_t buf_g4_l0,
read_only image2d_t buf_g4_l1,
read_only image2d_t buf_g5_l0,
read_only image2d_t buf_g5_l1,
// read_only image2d_t buf_g6_l0,
// read_only image2d_t buf_g6_l1,
// read_only image2d_t buf_g7_l0,
// read_only image2d_t buf_g7_l1,
const int pw, // width and height of the fine buffers (l0)
const int ph)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
int i = x, j = y;
if(x >= pw || y >= ph) return;
// fill boundary with 1 or 2 px:
if(pw & 1) { if(x > pw-2) i = pw-2; }
else { if(x > pw-3) i = pw-3; }
if(ph & 1) { if(y > ph-2) j = ph-2; }
else { if(y > ph-3) j = ph-3; }
if(x <= 0) i = 1;
if(y <= 0) j = 1;
float4 pixel;
pixel.x = expand_gaussian(output1, i, j, pw, ph);
const int num_gamma = 6; // this sucks, have to hardcode for the lack of arrays
const float v = read_imagef(input, sampleri, (int2)(x, y)).x;
int hi = 1;
// what we mean is this:
// for(;hi= 4);
pixel.x += r;
write_imagef (output0, (int2)(x, y), pixel);
}
float curve(
const float x,
const float g,
const float sigma,
const float shadows,
const float highlights,
const float clarity)
{
const float c = x-g;
float val;
const float ssigma = c > 0.0f ? sigma : - sigma;
const float shadhi = c > 0.0f ? shadows : highlights;
if (fabs(c) > 2*sigma) val = g + ssigma + shadhi * (c-ssigma); // linear part
else
{ // blend in via quadratic bezier
const float t = clamp(c / (2.0f*ssigma), 0.0f, 1.0f);
const float t2 = t * t;
const float mt = 1.0f-t;
val = g + ssigma * 2.0f*mt*t + t2*(ssigma + ssigma*shadhi);
}
// midtone local contrast
val += clarity * c * native_exp(-c*c/(2.0f*sigma*sigma/3.0f));
return val;
}
kernel void
process_curve(
read_only image2d_t input,
write_only image2d_t output,
const float g,
const float sigma,
const float shadows,
const float highlights,
const float clarity,
const int wd,
const int ht)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
if(x >= wd || y >= ht) return;
float4 pixel = read_imagef(input, sampleri, (int2)(x, y));
pixel.x = curve(pixel.x, g, sigma, shadows, highlights, clarity);
write_imagef (output, (int2)(x, y), pixel);
}
kernel void
write_back(
read_only image2d_t input,
read_only image2d_t processed,
write_only image2d_t output,
const int max_supp,
const int wd,
const int ht)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
if(x >= wd || y >= ht) return;
float4 pixel = read_imagef(input, sampleri, (int2)(x, y));
pixel.x = 100.0f*read_imagef(processed, sampleri, (int2)(x+max_supp, y+max_supp)).x;
write_imagef (output, (int2)(x, y), pixel);
}