19#ifndef MAGICKCORE_ACCELERATE_KERNELS_PRIVATE_H
20#define MAGICKCORE_ACCELERATE_KERNELS_PRIVATE_H
22#if defined(__cplusplus) || defined(c_plusplus)
26#if defined(MAGICKCORE_OPENCL_SUPPORT)
31#define OPENCL_DEFINE(VAR,...) "\n #""define " #VAR " " #__VA_ARGS__ " \n"
32#define OPENCL_ELIF(...) "\n #""elif " #__VA_ARGS__ " \n"
33#define OPENCL_ELSE() "\n #""else " " \n"
34#define OPENCL_ENDIF() "\n #""endif " " \n"
35#define OPENCL_IF(...) "\n #""if " #__VA_ARGS__ " \n"
36#define STRINGIFY(...) #__VA_ARGS__ "\n"
38const char* accelerateKernels =
43 OPENCL_DEFINE(GetPixelAlpha(pixel),(QuantumRange-(pixel).w))
44 OPENCL_DEFINE(SigmaUniform, (attenuate*0.015625f))
45 OPENCL_DEFINE(SigmaGaussian, (attenuate*0.015625f))
46 OPENCL_DEFINE(SigmaImpulse, (attenuate*0.1f))
47 OPENCL_DEFINE(SigmaLaplacian, (attenuate*0.0390625f))
48 OPENCL_DEFINE(SigmaMultiplicativeGaussian, (attenuate*0.5f))
49 OPENCL_DEFINE(SigmaPoisson, (attenuate*12.5f))
50 OPENCL_DEFINE(SigmaRandom, (attenuate))
51 OPENCL_DEFINE(TauGaussian, (attenuate*0.078125f))
52 OPENCL_DEFINE(MagickMax(x, y), (((x) > (y)) ? (x) : (y)))
53 OPENCL_DEFINE(MagickMin(x, y), (((x) < (y)) ? (x) : (y)))
64 TransparentColorspace,
79 Rec601YCbCrColorspace,
81 Rec709YCbCrColorspace,
102 UndefinedCompositeOp,
104 ModulusAddCompositeOp,
108 ChangeMaskCompositeOp,
110 ColorBurnCompositeOp,
111 ColorDodgeCompositeOp,
113 CopyBlackCompositeOp,
117 CopyGreenCompositeOp,
118 CopyMagentaCompositeOp,
119 CopyOpacityCompositeOp,
121 CopyYellowCompositeOp,
128 DifferenceCompositeOp,
131 ExclusionCompositeOp,
132 HardLightCompositeOp,
136 LinearLightCompositeOp,
148 SoftLightCompositeOp,
154 ModulusSubtractCompositeOp,
155 ThresholdCompositeOp,
161 DivideDstCompositeOp,
164 PegtopLightCompositeOp,
165 VividLightCompositeOp,
167 LinearDodgeCompositeOp,
168 LinearBurnCompositeOp,
169 MathematicsCompositeOp,
170 DivideSrcCompositeOp,
172 DarkenIntensityCompositeOp,
173 LightenIntensityCompositeOp
194 MultiplicativeGaussianNoise,
205 UndefinedPixelIntensityMethod = 0,
206 AveragePixelIntensityMethod,
207 BrightnessPixelIntensityMethod,
208 LightnessPixelIntensityMethod,
209 Rec601LumaPixelIntensityMethod,
210 Rec601LuminancePixelIntensityMethod,
211 Rec709LumaPixelIntensityMethod,
212 Rec709LuminancePixelIntensityMethod,
213 RMSPixelIntensityMethod,
214 MSPixelIntensityMethod
215 } PixelIntensityMethod;
220 BoxWeightingFunction = 0,
221 TriangleWeightingFunction,
222 CubicBCWeightingFunction,
223 HanningWeightingFunction,
224 HammingWeightingFunction,
225 BlackmanWeightingFunction,
226 GaussianWeightingFunction,
227 QuadraticWeightingFunction,
228 JincWeightingFunction,
229 SincWeightingFunction,
230 SincFastWeightingFunction,
231 KaiserWeightingFunction,
232 WelshWeightingFunction,
233 BohmanWeightingFunction,
234 LagrangeWeightingFunction,
235 CosineWeightingFunction,
236 } ResizeWeightingFunctionType;
244 GrayChannel = 0x0001,
245 CyanChannel = 0x0001,
246 GreenChannel = 0x0002,
247 MagentaChannel = 0x0002,
248 BlueChannel = 0x0004,
249 YellowChannel = 0x0004,
250 AlphaChannel = 0x0008,
251 OpacityChannel = 0x0008,
252 MatteChannel = 0x0008,
253 BlackChannel = 0x0020,
254 IndexChannel = 0x0020,
255 CompositeChannels = 0x002F,
256 AllChannels = 0x7ffffff,
260 TrueAlphaChannel = 0x0040,
261 RGBChannels = 0x0080,
262 GrayChannels = 0x0080,
263 SyncChannels = 0x0100,
264 DefaultChannels = ((AllChannels | SyncChannels) &~ OpacityChannel)
272OPENCL_IF((MAGICKCORE_QUANTUM_DEPTH == 8))
275 static inline CLQuantum ScaleCharToQuantum(const
unsigned char value)
277 return((CLQuantum) value);
281OPENCL_ELIF((MAGICKCORE_QUANTUM_DEPTH == 16))
284 static inline CLQuantum ScaleCharToQuantum(
const unsigned char value)
286 return((CLQuantum) (257.0f*value));
290OPENCL_ELIF((MAGICKCORE_QUANTUM_DEPTH == 32))
293 static inline CLQuantum ScaleCharToQuantum(
const unsigned char value)
295 return((CLQuantum) (16843009.0*value));
301OPENCL_IF((MAGICKCORE_HDRI_SUPPORT == 1))
304 static inline CLQuantum ClampToQuantum(const
float value)
306 return (CLQuantum) value;
313 static inline CLQuantum ClampToQuantum(
const float value)
315 return (CLQuantum) (clamp(value, 0.0f, QuantumRange) + 0.5f);
322 static inline
int ClampToCanvas(const
int offset, const
int range)
324 return clamp(offset, (
int)0, range - 1);
329 static inline int ClampToCanvasWithHalo(
const int offset,
const int range,
const int edge,
const int section)
331 return clamp(offset, section ? (
int)(0 - edge) : (
int)0, section ? (range - 1) : (range - 1 + edge));
336 static inline uint ScaleQuantumToMap(CLQuantum value)
338 if (value >= (CLQuantum)MaxMap)
339 return ((uint)MaxMap);
341 return ((uint)value);
346 static inline float PerceptibleReciprocal(
const float x)
348 float sign = x < (float) 0.0 ? (
float)-1.0 : (float) 1.0;
349 return((sign*x) >= MagickEpsilon ? (
float) 1.0 / x : sign*((
float) 1.0 / MagickEpsilon));
354 static inline float RoundToUnity(
const float value)
356 return clamp(value, 0.0f, 1.0f);
362 static inline CLQuantum getBlue(CLPixelType p) {
return p.x; }
363 static inline void setBlue(CLPixelType* p, CLQuantum value) { (*p).x = value; }
364 static inline float getBlueF4(float4 p) {
return p.x; }
365 static inline void setBlueF4(float4* p,
float value) { (*p).x = value; }
367 static inline CLQuantum getGreen(CLPixelType p) {
return p.y; }
368 static inline void setGreen(CLPixelType* p, CLQuantum value) { (*p).y = value; }
369 static inline float getGreenF4(float4 p) {
return p.y; }
370 static inline void setGreenF4(float4* p,
float value) { (*p).y = value; }
372 static inline CLQuantum getRed(CLPixelType p) {
return p.z; }
373 static inline void setRed(CLPixelType* p, CLQuantum value) { (*p).z = value; }
374 static inline float getRedF4(float4 p) {
return p.z; }
375 static inline void setRedF4(float4* p,
float value) { (*p).z = value; }
377 static inline CLQuantum getOpacity(CLPixelType p) {
return p.w; }
378 static inline void setOpacity(CLPixelType* p, CLQuantum value) { (*p).w = value; }
379 static inline float getOpacityF4(float4 p) {
return p.w; }
380 static inline void setOpacityF4(float4* p,
float value) { (*p).w = value; }
382 static inline void setGray(CLPixelType* p, CLQuantum value) { (*p).z = value; (*p).y = value; (*p).x = value; }
384 static inline float GetPixelIntensity(
const int method,
const int colorspace, CLPixelType p)
386 float red = getRed(p);
387 float green = getGreen(p);
388 float blue = getBlue(p);
392 if (colorspace == GRAYColorspace)
397 case AveragePixelIntensityMethod:
399 intensity = (red + green + blue) / 3.0;
402 case BrightnessPixelIntensityMethod:
404 intensity = MagickMax(MagickMax(red, green), blue);
407 case LightnessPixelIntensityMethod:
409 intensity = (MagickMin(MagickMin(red, green), blue) +
410 MagickMax(MagickMax(red, green), blue)) / 2.0;
413 case MSPixelIntensityMethod:
415 intensity = (float)(((
float)red*red + green*green + blue*blue) /
419 case Rec601LumaPixelIntensityMethod:
429 intensity = 0.298839*red + 0.586811*green + 0.114350*blue;
432 case Rec601LuminancePixelIntensityMethod:
442 intensity = 0.298839*red + 0.586811*green + 0.114350*blue;
445 case Rec709LumaPixelIntensityMethod:
456 intensity = 0.212656*red + 0.715158*green + 0.072186*blue;
459 case Rec709LuminancePixelIntensityMethod:
469 intensity = 0.212656*red + 0.715158*green + 0.072186*blue;
472 case RMSPixelIntensityMethod:
474 intensity = (float)(sqrt((
float)red*red + green*green + blue*blue) /
507ulong MWC_AddMod64(ulong a, ulong b, ulong M)
511 if( (v>=M) || (convert_float(v) < convert_float(a)) )
522ulong MWC_MulMod64(ulong a, ulong b, ulong M)
527 r=MWC_AddMod64(r,b,M);
528 b=MWC_AddMod64(b,b,M);
539ulong MWC_PowMod64(ulong a, ulong e, ulong M)
544 acc=MWC_MulMod64(acc,sqr,M);
545 sqr=MWC_MulMod64(sqr,sqr,M);
551uint2 MWC_SkipImpl_Mod64(uint2 curr, ulong A, ulong M, ulong distance)
553 ulong m=MWC_PowMod64(A, distance, M);
554 ulong x=curr.x*(ulong)A+curr.y;
555 x=MWC_MulMod64(x, m, M);
556 return (uint2)((uint)(x/A), (uint)(x%A));
559uint2 MWC_SeedImpl_Mod64(ulong A, ulong M, uint vecSize, uint vecOffset, ulong streamBase, ulong streamGap)
566 enum{ MWC_BASEID = 4077358422479273989UL };
568 ulong dist=streamBase + (get_global_id(0)*vecSize+vecOffset)*streamGap;
569 ulong m=MWC_PowMod64(A, dist, M);
571 ulong x=MWC_MulMod64(MWC_BASEID, m, M);
572 return (uint2)((uint)(x/A), (uint)(x%A));
576typedef struct{ uint x; uint c; uint seed0; ulong seed1; } mwc64x_state_t;
578void MWC64X_Step(mwc64x_state_t *s)
582 uint Xn=s->seed0*X+C;
583 uint carry=(uint)(Xn<C);
584 uint Cn=mad_hi(s->seed0,X,carry);
590void MWC64X_Skip(mwc64x_state_t *s, ulong distance)
592 uint2 tmp=MWC_SkipImpl_Mod64((uint2)(s->x,s->c), s->seed0, s->seed1, distance);
597void MWC64X_SeedStreams(mwc64x_state_t *s, ulong baseOffset, ulong perStreamOffset)
599 uint2 tmp=MWC_SeedImpl_Mod64(s->seed0, s->seed1, 1, 0, baseOffset, perStreamOffset);
605uint MWC64X_NextUint(mwc64x_state_t *s)
607 uint res=s->x ^ s->c;
616 float mwcReadPseudoRandomValue(mwc64x_state_t* rng) {
617 return (1.0f * MWC64X_NextUint(rng)) / (float)(0xffffffff);
621 float mwcGenerateDifferentialNoise(mwc64x_state_t* r, CLQuantum pixel, NoiseType noise_type,
float attenuate) {
630 alpha=mwcReadPseudoRandomValue(r);
635 noise=(pixel+QuantumRange*SigmaUniform*(alpha-0.5f));
646 beta=mwcReadPseudoRandomValue(r);
647 gamma=sqrt(-2.0f*log(alpha));
648 sigma=gamma*cospi((2.0f*beta));
649 tau=gamma*sinpi((2.0f*beta));
650 noise=(float)(pixel+sqrt((
float) pixel)*SigmaGaussian*sigma+
651 QuantumRange*TauGaussian*tau);
658 if (alpha < (SigmaImpulse/2.0f))
661 if (alpha >= (1.0f-(SigmaImpulse/2.0f)))
662 noise=(float)QuantumRange;
671 if (alpha <= MagickEpsilon)
672 noise=(float) (pixel-QuantumRange);
674 noise=(float) (pixel+QuantumRange*SigmaLaplacian*log(2.0f*alpha)+
679 if (beta <= (0.5f*MagickEpsilon))
680 noise=(float) (pixel+QuantumRange);
682 noise=(float) (pixel-QuantumRange*SigmaLaplacian*log(2.0f*beta)+0.5f);
685 case MultiplicativeGaussianNoise:
688 if (alpha > MagickEpsilon)
689 sigma=sqrt(-2.0f*log(alpha));
690 beta=mwcReadPseudoRandomValue(r);
691 noise=(float) (pixel+pixel*SigmaMultiplicativeGaussian*sigma*
692 cospi((
float) (2.0f*beta))/2.0f);
700 poisson=exp(-SigmaPoisson*QuantumScale*pixel);
701 for (i=0; alpha > poisson; i++)
703 beta=mwcReadPseudoRandomValue(r);
706 noise=(float) (QuantumRange*i*PerceptibleReciprocal(SigmaPoisson));
711 noise=(float) (QuantumRange*SigmaRandom*alpha);
720 void AddNoise(
const __global CLPixelType* inputImage, __global CLPixelType* filteredImage
721 ,
const unsigned int inputPixelCount,
const unsigned int pixelsPerWorkItem
722 ,
const ChannelType channel
723 ,
const NoiseType noise_type,
const float attenuate
724 ,
const unsigned int seed0,
const unsigned int seed1
725 ,
const unsigned int numRandomNumbersPerPixel) {
731 uint span = pixelsPerWorkItem * numRandomNumbersPerPixel;
732 uint offset = span * get_local_size(0) * get_group_id(0);
734 MWC64X_SeedStreams(&rng, offset, span);
736 uint pos = get_local_size(0) * get_group_id(0) * pixelsPerWorkItem + get_local_id(0);
738 uint count = pixelsPerWorkItem;
741 if (pos < inputPixelCount) {
742 CLPixelType p = inputImage[pos];
744 if ((channel&RedChannel)!=0) {
745 setRed(&p,ClampToQuantum(mwcGenerateDifferentialNoise(&rng,getRed(p),noise_type,attenuate)));
748 if ((channel&GreenChannel)!=0) {
749 setGreen(&p,ClampToQuantum(mwcGenerateDifferentialNoise(&rng,getGreen(p),noise_type,attenuate)));
752 if ((channel&BlueChannel)!=0) {
753 setBlue(&p,ClampToQuantum(mwcGenerateDifferentialNoise(&rng,getBlue(p),noise_type,attenuate)));
756 if ((channel & OpacityChannel) != 0) {
757 setOpacity(&p,ClampToQuantum(mwcGenerateDifferentialNoise(&rng,getOpacity(p),noise_type,attenuate)));
760 filteredImage[pos] = p;
763 pos += get_local_size(0);
789 __kernel
void BlurRow(__global CLPixelType *im, __global float4 *filtered_im,
790 const ChannelType channel, __constant
float *filter,
791 const unsigned int width,
792 const unsigned int imageColumns,
const unsigned int imageRows,
793 __local CLPixelType *temp)
795 const int x = get_global_id(0);
796 const int y = get_global_id(1);
798 const int columns = imageColumns;
800 const unsigned int radius = (width-1)/2;
801 const int wsize = get_local_size(0);
802 const unsigned int loadSize = wsize+width;
830 const int groupX=get_local_size(0)*get_group_id(0);
831 const int groupY=get_local_size(1)*get_group_id(1);
834 for (
int i=get_local_id(0); i < loadSize; i=i+get_local_size(0))
837 temp[i] = im[y * columns + ClampToCanvas(i+groupX-radius, columns)];
846 barrier(CLK_LOCAL_MEM_FENCE);
849 if (get_global_id(0) < columns)
852 float4 result = (float4) 0;
856 \n #ifndef UFACTOR \n
857 \n #define UFACTOR 8 \n
860 for ( ; i+UFACTOR < width; )
862 \n #pragma unroll UFACTOR\n
863 for (
int j=0; j < UFACTOR; j++, i++)
865 result+=filter[i]*convert_float4(temp[i+get_local_id(0)]);
869 for ( ; i < width; i++)
871 result+=filter[i]*convert_float4(temp[i+get_local_id(0)]);
874 result.x = ClampToQuantum(result.x);
875 result.y = ClampToQuantum(result.y);
876 result.z = ClampToQuantum(result.z);
877 result.w = ClampToQuantum(result.w);
880 filtered_im[y*columns+x] = result;
893 __kernel
void BlurColumn(
const __global float4 *blurRowData, __global CLPixelType *filtered_im,
894 const ChannelType channel, __constant
float *filter,
895 const unsigned int width,
896 const unsigned int imageColumns,
const unsigned int imageRows,
897 __local float4 *temp)
899 const int x = get_global_id(0);
900 const int y = get_global_id(1);
904 const int columns = imageColumns;
905 const int rows = imageRows;
907 unsigned int radius = (width-1)/2;
908 const int wsize = get_local_size(1);
909 const unsigned int loadSize = wsize+width;
912 const int groupX=get_local_size(0)*get_group_id(0);
913 const int groupY=get_local_size(1)*get_group_id(1);
918 for (
int i = get_local_id(1); i < loadSize; i=i+get_local_size(1))
920 temp[i] = blurRowData[ClampToCanvas(i+groupY-radius, rows) * columns + groupX];
924 barrier(CLK_LOCAL_MEM_FENCE);
927 if (get_global_id(1) < rows)
930 float4 result = (float4) 0;
934 \n #ifndef UFACTOR \n
935 \n #define UFACTOR 8 \n
938 for ( ; i+UFACTOR < width; )
940 \n #pragma unroll UFACTOR \n
941 for (
int j=0; j < UFACTOR; j++, i++)
943 result+=filter[i]*temp[i+get_local_id(1)];
947 for ( ; i < width; i++)
949 result+=filter[i]*temp[i+get_local_id(1)];
952 result.x = ClampToQuantum(result.x);
953 result.y = ClampToQuantum(result.y);
954 result.z = ClampToQuantum(result.z);
955 result.w = ClampToQuantum(result.w);
958 filtered_im[y*columns+x] = (CLPixelType) (result.x,result.y,result.z,result.w);
977 static inline float ColorDodge(
const float Sca,
978 const float Sa,
const float Dca,
const float Da)
983 if ((Sca*Da+Dca*Sa) >= Sa*Da)
984 return(Sa*Da+Sca*(1.0-Da)+Dca*(1.0-Sa));
985 return(Dca*Sa*Sa/(Sa-Sca)+Sca*(1.0-Da)+Dca*(1.0-Sa));
1018 static inline void CompositeColorDodge(
const float4 *p,
1019 const float4 *q,float4 *composite) {
1026 Sa=1.0f-QuantumScale*getOpacityF4(*p);
1027 Da=1.0f-QuantumScale*getOpacityF4(*q);
1028 gamma=RoundToUnity(Sa+Da-Sa*Da);
1029 setOpacityF4(composite, QuantumRange*(1.0-gamma));
1030 gamma=QuantumRange/(fabs(gamma) < MagickEpsilon ? MagickEpsilon : gamma);
1031 setRedF4(composite,gamma*ColorDodge(QuantumScale*getRedF4(*p)*Sa,Sa,QuantumScale*
1032 getRedF4(*q)*Da,Da));
1033 setGreenF4(composite,gamma*ColorDodge(QuantumScale*getGreenF4(*p)*Sa,Sa,QuantumScale*
1034 getGreenF4(*q)*Da,Da));
1035 setBlueF4(composite,gamma*ColorDodge(QuantumScale*getBlueF4(*p)*Sa,Sa,QuantumScale*
1036 getBlueF4(*q)*Da,Da));
1041 static inline void MagickPixelCompositePlus(
const float4 *p,
1042 const float alpha,
const float4 *q,
1043 const float beta,float4 *composite)
1054 Sa=1.0-QuantumScale*alpha;
1055 Da=1.0-QuantumScale*beta;
1056 gamma=RoundToUnity(Sa+Da);
1057 setOpacityF4(composite,(
float) QuantumRange*(1.0-gamma));
1058 gamma=PerceptibleReciprocal(gamma);
1059 setRedF4(composite,gamma*(Sa*getRedF4(*p)+Da*getRedF4(*q)));
1060 setGreenF4(composite,gamma*(Sa*getGreenF4(*p)+Da*getGreenF4(*q)));
1061 setBlueF4(composite,gamma*(Sa*getBlueF4(*p)+Da*getBlueF4(*q)));
1066 static inline void MagickPixelCompositeBlend(
const float4 *p,
1067 const float alpha,
const float4 *q,
1068 const float beta,float4 *composite)
1070 MagickPixelCompositePlus(p,(
float) (QuantumRange-alpha*
1071 (QuantumRange-getOpacityF4(*p))),q,(
float) (QuantumRange-beta*
1072 (QuantumRange-getOpacityF4(*q))),composite);
1078 void Composite(__global CLPixelType *image,
1079 const unsigned int imageWidth,
1080 const unsigned int imageHeight,
1081 const unsigned int imageMatte,
1082 const __global CLPixelType *compositeImage,
1083 const unsigned int compositeWidth,
1084 const unsigned int compositeHeight,
1085 const unsigned int compositeMatte,
1086 const unsigned int compose,
1087 const ChannelType channel,
1088 const float destination_dissolve,
1089 const float source_dissolve) {
1092 index.x = get_global_id(0);
1093 index.y = get_global_id(1);
1096 if (index.x >= imageWidth
1097 || index.y >= imageHeight) {
1100 const CLPixelType inputPixel = image[index.y*imageWidth+index.x];
1102 setRedF4(&destination,getRed(inputPixel));
1103 setGreenF4(&destination,getGreen(inputPixel));
1104 setBlueF4(&destination,getBlue(inputPixel));
1107 const CLPixelType compositePixel
1108 = compositeImage[index.y*imageWidth+index.x];
1110 setRedF4(&source,getRed(compositePixel));
1111 setGreenF4(&source,getGreen(compositePixel));
1112 setBlueF4(&source,getBlue(compositePixel));
1114 if (imageMatte != 0) {
1115 setOpacityF4(&destination,getOpacity(inputPixel));
1118 setOpacityF4(&destination,0.0f);
1121 if (compositeMatte != 0) {
1122 setOpacityF4(&source,getOpacity(compositePixel));
1125 setOpacityF4(&source,0.0f);
1128 float4 composite=destination;
1130 CompositeOperator op = (CompositeOperator)compose;
1132 case ColorDodgeCompositeOp:
1133 CompositeColorDodge(&source,&destination,&composite);
1135 case BlendCompositeOp:
1136 MagickPixelCompositeBlend(&source,source_dissolve,&destination,
1137 destination_dissolve,&composite);
1144 CLPixelType outputPixel;
1145 setRed(&outputPixel, ClampToQuantum(getRedF4(composite)));
1146 setGreen(&outputPixel, ClampToQuantum(getGreenF4(composite)));
1147 setBlue(&outputPixel, ClampToQuantum(getBlueF4(composite)));
1148 setOpacity(&outputPixel, ClampToQuantum(getOpacityF4(composite)));
1149 image[index.y*imageWidth+index.x] = outputPixel;
1167 static inline float3 ConvertRGBToHSB(CLPixelType pixel) {
1168 float3 HueSaturationBrightness;
1169 HueSaturationBrightness.x = 0.0f;
1170 HueSaturationBrightness.y = 0.0f;
1171 HueSaturationBrightness.z = 0.0f;
1173 float r=(float) getRed(pixel);
1174 float g=(float) getGreen(pixel);
1175 float b=(float) getBlue(pixel);
1177 float tmin=MagickMin(MagickMin(r,g),b);
1178 float tmax= MagickMax(MagickMax(r,g),b);
1181 float delta=tmax-tmin;
1182 HueSaturationBrightness.y=delta/tmax;
1183 HueSaturationBrightness.z=QuantumScale*tmax;
1185 if (delta != 0.0f) {
1186 HueSaturationBrightness.x = ((r == tmax)?0.0f:((g == tmax)?2.0f:4.0f));
1187 HueSaturationBrightness.x += ((r == tmax)?(g-b):((g == tmax)?(b-r):(r-g)))/delta;
1188 HueSaturationBrightness.x/=6.0f;
1189 HueSaturationBrightness.x += (HueSaturationBrightness.x < 0.0f)?0.0f:1.0f;
1192 return HueSaturationBrightness;
1195 static inline CLPixelType ConvertHSBToRGB(float3 HueSaturationBrightness) {
1197 float hue = HueSaturationBrightness.x;
1198 float brightness = HueSaturationBrightness.z;
1199 float saturation = HueSaturationBrightness.y;
1203 if (saturation == 0.0f) {
1204 setRed(&rgb,ClampToQuantum(QuantumRange*brightness));
1205 setGreen(&rgb,getRed(rgb));
1206 setBlue(&rgb,getRed(rgb));
1210 float h=6.0f*(hue-floor(hue));
1212 float p=brightness*(1.0f-saturation);
1213 float q=brightness*(1.0f-saturation*f);
1214 float t=brightness*(1.0f-(saturation*(1.0f-f)));
1216 float clampedBrightness = ClampToQuantum(QuantumRange*brightness);
1217 float clamped_t = ClampToQuantum(QuantumRange*t);
1218 float clamped_p = ClampToQuantum(QuantumRange*p);
1219 float clamped_q = ClampToQuantum(QuantumRange*q);
1221 setRed(&rgb, (ih == 1)?clamped_q:
1222 (ih == 2 || ih == 3)?clamped_p:
1223 (ih == 4)?clamped_t:
1226 setGreen(&rgb, (ih == 1 || ih == 2)?clampedBrightness:
1227 (ih == 3)?clamped_q:
1228 (ih == 4 || ih == 5)?clamped_p:
1231 setBlue(&rgb, (ih == 2)?clamped_t:
1232 (ih == 3 || ih == 4)?clampedBrightness:
1233 (ih == 5)?clamped_q:
1239 __kernel
void Contrast(__global CLPixelType *im,
const unsigned int sharpen)
1242 const int sign = sharpen!=0?1:-1;
1243 const int x = get_global_id(0);
1244 const int y = get_global_id(1);
1245 const int columns = get_global_size(0);
1246 const int c = x + y * columns;
1248 CLPixelType pixel = im[c];
1249 float3 HueSaturationBrightness = ConvertRGBToHSB(pixel);
1250 float brightness = HueSaturationBrightness.z;
1251 brightness+=0.5f*sign*(0.5f*(sinpi(brightness-0.5f)+1.0f)-brightness);
1252 brightness = clamp(brightness,0.0f,1.0f);
1253 HueSaturationBrightness.z = brightness;
1255 CLPixelType filteredPixel = ConvertHSBToRGB(HueSaturationBrightness);
1256 filteredPixel.w = pixel.w;
1257 im[c] = filteredPixel;
1276 __kernel
void Histogram(__global CLPixelType * restrict im,
1277 const ChannelType channel,
1279 const int colorspace,
1280 __global uint4 * restrict histogram)
1282 const int x = get_global_id(0);
1283 const int y = get_global_id(1);
1284 const int columns = get_global_size(0);
1285 const int c = x + y * columns;
1286 if ((channel & SyncChannels) != 0)
1288 float intensity = GetPixelIntensity(method, colorspace,im[c]);
1289 uint pos = ScaleQuantumToMap(ClampToQuantum(intensity));
1290 atomic_inc((__global uint *)(&(histogram[pos]))+2);
1303 __kernel
void ContrastStretch(__global CLPixelType * restrict im,
1304 const ChannelType channel,
1305 __global CLPixelType * restrict stretch_map,
1306 const float4 white,
const float4 black)
1308 const int x = get_global_id(0);
1309 const int y = get_global_id(1);
1310 const int columns = get_global_size(0);
1311 const int c = x + y * columns;
1314 CLPixelType oValue, eValue;
1315 CLQuantum red, green, blue, opacity;
1320 if ((channel & RedChannel) != 0)
1322 if (getRedF4(white) != getRedF4(black))
1324 ePos = ScaleQuantumToMap(getRed(oValue));
1325 eValue = stretch_map[ePos];
1326 red = getRed(eValue);
1330 if ((channel & GreenChannel) != 0)
1332 if (getGreenF4(white) != getGreenF4(black))
1334 ePos = ScaleQuantumToMap(getGreen(oValue));
1335 eValue = stretch_map[ePos];
1336 green = getGreen(eValue);
1340 if ((channel & BlueChannel) != 0)
1342 if (getBlueF4(white) != getBlueF4(black))
1344 ePos = ScaleQuantumToMap(getBlue(oValue));
1345 eValue = stretch_map[ePos];
1346 blue = getBlue(eValue);
1350 if ((channel & OpacityChannel) != 0)
1352 if (getOpacityF4(white) != getOpacityF4(black))
1354 ePos = ScaleQuantumToMap(getOpacity(oValue));
1355 eValue = stretch_map[ePos];
1356 opacity = getOpacity(eValue);
1361 im[c]=(CLPixelType)(blue, green, red, opacity);
1380 void ConvolveOptimized(
const __global CLPixelType *input, __global CLPixelType *output,
1381 const unsigned int imageWidth,
const unsigned int imageHeight,
1382 __constant
float *filter,
const unsigned int filterWidth,
const unsigned int filterHeight,
1383 const uint matte,
const ChannelType channel, __local CLPixelType *pixelLocalCache, __local
float* filterCache) {
1386 blockID.x = get_group_id(0);
1387 blockID.y = get_group_id(1);
1391 imageAreaOrg.x = blockID.x * get_local_size(0);
1392 imageAreaOrg.y = blockID.y * get_local_size(1);
1394 int2 midFilterDimen;
1395 midFilterDimen.x = (filterWidth-1)/2;
1396 midFilterDimen.y = (filterHeight-1)/2;
1398 int2 cachedAreaOrg = imageAreaOrg - midFilterDimen;
1401 int2 cachedAreaDimen;
1402 cachedAreaDimen.x = get_local_size(0) + filterWidth - 1;
1403 cachedAreaDimen.y = get_local_size(1) + filterHeight - 1;
1406 int localID = get_local_id(1)*get_local_size(0)+get_local_id(0);
1407 int cachedAreaNumPixels = cachedAreaDimen.x * cachedAreaDimen.y;
1408 int groupSize = get_local_size(0) * get_local_size(1);
1409 for (
int i = localID; i < cachedAreaNumPixels; i+=groupSize) {
1411 int2 cachedAreaIndex;
1412 cachedAreaIndex.x = i % cachedAreaDimen.x;
1413 cachedAreaIndex.y = i / cachedAreaDimen.x;
1415 int2 imagePixelIndex;
1416 imagePixelIndex = cachedAreaOrg + cachedAreaIndex;
1420 imagePixelIndex.x = ClampToCanvas(imagePixelIndex.x, imageWidth);
1421 imagePixelIndex.y = ClampToCanvas(imagePixelIndex.y, imageHeight);
1423 pixelLocalCache[i] = input[imagePixelIndex.y * imageWidth + imagePixelIndex.x];
1427 for (
int i = localID; i < filterHeight*filterWidth; i+=groupSize) {
1428 filterCache[i] = filter[i];
1430 barrier(CLK_LOCAL_MEM_FENCE);
1434 imageIndex.x = imageAreaOrg.x + get_local_id(0);
1435 imageIndex.y = imageAreaOrg.y + get_local_id(1);
1438 if (imageIndex.x >= imageWidth
1439 || imageIndex.y >= imageHeight) {
1443 int filterIndex = 0;
1444 float4 sum = (float4)0.0f;
1446 if (((channel & OpacityChannel) == 0) || (matte == 0)) {
1447 int cacheIndexY = get_local_id(1);
1448 for (
int j = 0; j < filterHeight; j++) {
1449 int cacheIndexX = get_local_id(0);
1450 for (
int i = 0; i < filterWidth; i++) {
1451 CLPixelType p = pixelLocalCache[cacheIndexY*cachedAreaDimen.x + cacheIndexX];
1452 float f = filterCache[filterIndex];
1467 int cacheIndexY = get_local_id(1);
1468 for (
int j = 0; j < filterHeight; j++) {
1469 int cacheIndexX = get_local_id(0);
1470 for (
int i = 0; i < filterWidth; i++) {
1472 CLPixelType p = pixelLocalCache[cacheIndexY*cachedAreaDimen.x + cacheIndexX];
1473 float alpha = QuantumScale*(QuantumRange-p.w);
1474 float f = filterCache[filterIndex];
1475 float g = alpha * f;
1488 gamma = PerceptibleReciprocal(gamma);
1489 sum.xyz = gamma*sum.xyz;
1491 CLPixelType outputPixel;
1492 outputPixel.x = ClampToQuantum(sum.x);
1493 outputPixel.y = ClampToQuantum(sum.y);
1494 outputPixel.z = ClampToQuantum(sum.z);
1495 outputPixel.w = ((channel & OpacityChannel)!=0)?ClampToQuantum(sum.w):input[imageIndex.y * imageWidth + imageIndex.x].w;
1497 output[imageIndex.y * imageWidth + imageIndex.x] = outputPixel;
1503 void Convolve(
const __global CLPixelType *input, __global CLPixelType *output,
1504 const uint imageWidth,
const uint imageHeight,
1505 __constant
float *filter,
const unsigned int filterWidth,
const unsigned int filterHeight,
1506 const uint matte,
const ChannelType channel) {
1509 imageIndex.x = get_global_id(0);
1510 imageIndex.y = get_global_id(1);
1516 if (imageIndex.x >= imageWidth
1517 || imageIndex.y >= imageHeight)
1520 int2 midFilterDimen;
1521 midFilterDimen.x = (filterWidth-1)/2;
1522 midFilterDimen.y = (filterHeight-1)/2;
1524 int filterIndex = 0;
1525 float4 sum = (float4)0.0f;
1527 if (((channel & OpacityChannel) == 0) || (matte == 0)) {
1528 for (
int j = 0; j < filterHeight; j++) {
1529 int2 inputPixelIndex;
1530 inputPixelIndex.y = imageIndex.y - midFilterDimen.y + j;
1531 inputPixelIndex.y = ClampToCanvas(inputPixelIndex.y, imageHeight);
1532 for (
int i = 0; i < filterWidth; i++) {
1533 inputPixelIndex.x = imageIndex.x - midFilterDimen.x + i;
1534 inputPixelIndex.x = ClampToCanvas(inputPixelIndex.x, imageWidth);
1536 CLPixelType p = input[inputPixelIndex.y * imageWidth + inputPixelIndex.x];
1537 float f = filter[filterIndex];
1552 for (
int j = 0; j < filterHeight; j++) {
1553 int2 inputPixelIndex;
1554 inputPixelIndex.y = imageIndex.y - midFilterDimen.y + j;
1555 inputPixelIndex.y = ClampToCanvas(inputPixelIndex.y, imageHeight);
1556 for (
int i = 0; i < filterWidth; i++) {
1557 inputPixelIndex.x = imageIndex.x - midFilterDimen.x + i;
1558 inputPixelIndex.x = ClampToCanvas(inputPixelIndex.x, imageWidth);
1560 CLPixelType p = input[inputPixelIndex.y * imageWidth + inputPixelIndex.x];
1561 float alpha = QuantumScale*(QuantumRange-p.w);
1562 float f = filter[filterIndex];
1563 float g = alpha * f;
1576 gamma = PerceptibleReciprocal(gamma);
1577 sum.xyz = gamma*sum.xyz;
1580 CLPixelType outputPixel;
1581 outputPixel.x = ClampToQuantum(sum.x);
1582 outputPixel.y = ClampToQuantum(sum.y);
1583 outputPixel.z = ClampToQuantum(sum.z);
1584 outputPixel.w = ((channel & OpacityChannel)!=0)?ClampToQuantum(sum.w):input[imageIndex.y * imageWidth + imageIndex.x].w;
1586 output[imageIndex.y * imageWidth + imageIndex.x] = outputPixel;
1604 __kernel
void HullPass1(
const __global CLPixelType *inputImage, __global CLPixelType *outputImage
1605 ,
const unsigned int imageWidth,
const unsigned int imageHeight
1606 ,
const int2 offset,
const int polarity,
const int matte) {
1608 int x = get_global_id(0);
1609 int y = get_global_id(1);
1611 CLPixelType v = inputImage[y*imageWidth+x];
1614 neighbor.y = y + offset.y;
1615 neighbor.x = x + offset.x;
1617 int2 clampedNeighbor;
1618 clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
1619 clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
1621 CLPixelType r = (clampedNeighbor.x == neighbor.x
1622 && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
1638 \n #pragma unroll 4\n
1639 for (
unsigned int i = 0; i < 4; i++) {
1640 sv[i] = (sr[i] >= (sv[i]+ScaleCharToQuantum(2)))?(sv[i]+ScaleCharToQuantum(1)):sv[i];
1644 \n #pragma unroll 4\n
1645 for (
unsigned int i = 0; i < 4; i++) {
1646 sv[i] = (sr[i] <= (sv[i]-ScaleCharToQuantum(2)))?(sv[i]-ScaleCharToQuantum(1)):sv[i];
1651 v.x = (CLQuantum)sv[0];
1652 v.y = (CLQuantum)sv[1];
1653 v.z = (CLQuantum)sv[2];
1656 v.w = (CLQuantum)sv[3];
1658 outputImage[y*imageWidth+x] = v;
1669 __kernel
void HullPass2(
const __global CLPixelType *inputImage, __global CLPixelType *outputImage
1670 ,
const unsigned int imageWidth,
const unsigned int imageHeight
1671 ,
const int2 offset,
const int polarity,
const int matte) {
1673 int x = get_global_id(0);
1674 int y = get_global_id(1);
1676 CLPixelType v = inputImage[y*imageWidth+x];
1678 int2 neighbor, clampedNeighbor;
1680 neighbor.y = y + offset.y;
1681 neighbor.x = x + offset.x;
1682 clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
1683 clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
1685 CLPixelType r = (clampedNeighbor.x == neighbor.x
1686 && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
1690 neighbor.y = y - offset.y;
1691 neighbor.x = x - offset.x;
1692 clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
1693 clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
1695 CLPixelType s = (clampedNeighbor.x == neighbor.x
1696 && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
1719 \n #pragma unroll 4\n
1720 for (
unsigned int i = 0; i < 4; i++) {
1725 sv[i] =(( (int)( ss[i] < (sv[i]+ScaleCharToQuantum(2))) + (int) ( sr[i] <= sv[i] ) ) !=0) ? sv[i]:(sv[i]+ScaleCharToQuantum(1));
1729 \n #pragma unroll 4\n
1730 for (
unsigned int i = 0; i < 4; i++) {
1734 sv[i] = (( (int)(ss[i] > (sv[i]-ScaleCharToQuantum(2))) + (int)( sr[i] >= sv[i] )) !=0) ? sv[i]:(sv[i]-ScaleCharToQuantum(1));
1738 v.x = (CLQuantum)sv[0];
1739 v.y = (CLQuantum)sv[1];
1740 v.z = (CLQuantum)sv[2];
1743 v.w = (CLQuantum)sv[3];
1745 outputImage[y*imageWidth+x] = v;
1767 __kernel
void Equalize(__global CLPixelType * restrict im,
1768 const ChannelType channel,
1769 __global CLPixelType * restrict equalize_map,
1770 const float4 white,
const float4 black)
1772 const int x = get_global_id(0);
1773 const int y = get_global_id(1);
1774 const int columns = get_global_size(0);
1775 const int c = x + y * columns;
1778 CLPixelType oValue, eValue;
1779 CLQuantum red, green, blue, opacity;
1784 if ((channel & SyncChannels) != 0)
1786 if (getRedF4(white) != getRedF4(black))
1788 ePos = ScaleQuantumToMap(getRed(oValue));
1789 eValue = equalize_map[ePos];
1790 red = getRed(eValue);
1791 ePos = ScaleQuantumToMap(getGreen(oValue));
1792 eValue = equalize_map[ePos];
1793 green = getRed(eValue);
1794 ePos = ScaleQuantumToMap(getBlue(oValue));
1795 eValue = equalize_map[ePos];
1796 blue = getRed(eValue);
1797 ePos = ScaleQuantumToMap(getOpacity(oValue));
1798 eValue = equalize_map[ePos];
1799 opacity = getRed(eValue);
1802 im[c]=(CLPixelType)(blue, green, red, opacity);
1830 CLPixelType ApplyFunction(CLPixelType pixel,
const MagickFunction function,
1831 const unsigned int number_parameters,
1832 __constant
float *parameters)
1834 float4 result = (float4) 0.0f;
1837 case PolynomialFunction:
1839 for (
unsigned int i=0; i < number_parameters; i++)
1840 result = result*(float4)QuantumScale*convert_float4(pixel) + parameters[i];
1841 result *= (float4)QuantumRange;
1844 case SinusoidFunction:
1846 float freq,phase,ampl,bias;
1847 freq = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
1848 phase = ( number_parameters >= 2 ) ? parameters[1] : 0.0f;
1849 ampl = ( number_parameters >= 3 ) ? parameters[2] : 0.5f;
1850 bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
1851 result.x = QuantumRange*(ampl*sin(2.0f*MagickPI*
1852 (freq*QuantumScale*(
float)pixel.x + phase/360.0f)) + bias);
1853 result.y = QuantumRange*(ampl*sin(2.0f*MagickPI*
1854 (freq*QuantumScale*(
float)pixel.y + phase/360.0f)) + bias);
1855 result.z = QuantumRange*(ampl*sin(2.0f*MagickPI*
1856 (freq*QuantumScale*(
float)pixel.z + phase/360.0f)) + bias);
1857 result.w = QuantumRange*(ampl*sin(2.0f*MagickPI*
1858 (freq*QuantumScale*(
float)pixel.w + phase/360.0f)) + bias);
1861 case ArcsinFunction:
1863 float width,range,center,bias;
1864 width = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
1865 center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f;
1866 range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f;
1867 bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
1869 result.x = 2.0f/width*(QuantumScale*(float)pixel.x - center);
1870 result.x = range/MagickPI*asin(result.x)+bias;
1871 result.x = ( result.x <= -1.0f ) ? bias - range/2.0f : result.x;
1872 result.x = ( result.x >= 1.0f ) ? bias + range/2.0f : result.x;
1874 result.y = 2.0f/width*(QuantumScale*(float)pixel.y - center);
1875 result.y = range/MagickPI*asin(result.y)+bias;
1876 result.y = ( result.y <= -1.0f ) ? bias - range/2.0f : result.y;
1877 result.y = ( result.y >= 1.0f ) ? bias + range/2.0f : result.y;
1879 result.z = 2.0f/width*(QuantumScale*(float)pixel.z - center);
1880 result.z = range/MagickPI*asin(result.z)+bias;
1881 result.z = ( result.z <= -1.0f ) ? bias - range/2.0f : result.x;
1882 result.z = ( result.z >= 1.0f ) ? bias + range/2.0f : result.x;
1885 result.w = 2.0f/width*(QuantumScale*(float)pixel.w - center);
1886 result.w = range/MagickPI*asin(result.w)+bias;
1887 result.w = ( result.w <= -1.0f ) ? bias - range/2.0f : result.w;
1888 result.w = ( result.w >= 1.0f ) ? bias + range/2.0f : result.w;
1890 result *= (float4)QuantumRange;
1893 case ArctanFunction:
1895 float slope,range,center,bias;
1896 slope = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
1897 center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f;
1898 range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f;
1899 bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
1900 result = (float4)MagickPI*(float4)slope*((float4)QuantumScale*convert_float4(pixel)-(float4)center);
1901 result = (float4)QuantumRange*((float4)range/(float4)MagickPI*atan(result) + (float4)bias);
1904 case UndefinedFunction:
1907 return (CLPixelType) (ClampToQuantum(result.x), ClampToQuantum(result.y),
1908 ClampToQuantum(result.z), ClampToQuantum(result.w));
1920 __kernel
void ComputeFunction(__global CLPixelType *im,
1921 const ChannelType channel,
const MagickFunction function,
1922 const unsigned int number_parameters, __constant
float *parameters)
1924 const int x = get_global_id(0);
1925 const int y = get_global_id(1);
1926 const int columns = get_global_size(0);
1927 const int c = x + y * columns;
1928 im[c] = ApplyFunction(im[c], function, number_parameters, parameters);
1945 __kernel
void Grayscale(__global CLPixelType *im,
1946 const int method,
const int colorspace)
1949 const int x = get_global_id(0);
1950 const int y = get_global_id(1);
1951 const int columns = get_global_size(0);
1952 const int c = x + y * columns;
1954 CLPixelType pixel = im[c];
1962 red=(float)getRed(pixel);
1963 green=(float)getGreen(pixel);
1964 blue=(float)getBlue(pixel);
1968 CLPixelType filteredPixel;
1972 case AveragePixelIntensityMethod:
1974 intensity=(red+green+blue)/3.0;
1977 case BrightnessPixelIntensityMethod:
1979 intensity=MagickMax(MagickMax(red,green),blue);
1982 case LightnessPixelIntensityMethod:
1984 intensity=(MagickMin(MagickMin(red,green),blue)+
1985 MagickMax(MagickMax(red,green),blue))/2.0;
1988 case MSPixelIntensityMethod:
1990 intensity=(float) (((
float) red*red+green*green+
1991 blue*blue)/(3.0*QuantumRange));
1994 case Rec601LumaPixelIntensityMethod:
2004 intensity=0.298839*red+0.586811*green+0.114350*blue;
2007 case Rec601LuminancePixelIntensityMethod:
2017 intensity=0.298839*red+0.586811*green+0.114350*blue;
2020 case Rec709LumaPixelIntensityMethod:
2031 intensity=0.212656*red+0.715158*green+0.072186*blue;
2034 case Rec709LuminancePixelIntensityMethod:
2044 intensity=0.212656*red+0.715158*green+0.072186*blue;
2047 case RMSPixelIntensityMethod:
2049 intensity=(float) (sqrt((
float) red*red+green*green+
2050 blue*blue)/sqrt(3.0));
2056 setGray(&filteredPixel, ClampToQuantum(intensity));
2058 filteredPixel.w = pixel.w;
2060 im[c] = filteredPixel;
2077 static inline int mirrorBottom(
int value)
2079 return (value < 0) ? - (value) : value;
2081 static inline int mirrorTop(
int value,
int width)
2083 return (value >= width) ? (2 * width - value - 1) : value;
2086 __kernel
void LocalContrastBlurRow(__global CLPixelType *srcImage, __global CLPixelType *dstImage, __global
float *tmpImage,
2088 const int imageWidth,
2089 const int imageHeight)
2091 const float4 RGB = ((float4)(0.2126f, 0.7152f, 0.0722f, 0.0f));
2093 int x = get_local_id(0);
2094 int y = get_global_id(1);
2096 if ((x >= imageWidth) || (y >= imageHeight))
2099 global CLPixelType *src = srcImage + y * imageWidth;
2101 for (
int i = x; i < imageWidth; i += get_local_size(0)) {
2103 float weight = 1.0f;
2106 while ((j + 7) < i) {
2107 for (
int k = 0; k < 8; ++k)
2108 sum += (weight + k) * dot(RGB, convert_float4(src[mirrorBottom(j+k)]));
2113 sum += weight * dot(RGB, convert_float4(src[mirrorBottom(j)]));
2118 while ((j + 7) < radius + i) {
2119 for (
int k = 0; k < 8; ++k)
2120 sum += (weight - k) * dot(RGB, convert_float4(src[mirrorTop(j + k, imageWidth)]));
2124 while (j < radius + i) {
2125 sum += weight * dot(RGB, convert_float4(src[mirrorTop(j, imageWidth)]));
2130 tmpImage[i + y * imageWidth] = sum / ((radius + 1) * (radius + 1));
2136 __kernel
void LocalContrastBlurApplyColumn(__global CLPixelType *srcImage, __global CLPixelType *dstImage, __global
float *blurImage,
2138 const float strength,
2139 const int imageWidth,
2140 const int imageHeight)
2142 const float4 RGB = (float4)(0.2126f, 0.7152f, 0.0722f, 0.0f);
2144 int x = get_global_id(0);
2145 int y = get_global_id(1);
2147 if ((x >= imageWidth) || (y >= imageHeight))
2150 global
float *src = blurImage + x;
2153 float weight = 1.0f;
2156 while ((j + 7) < y) {
2157 for (
int k = 0; k < 8; ++k)
2158 sum += (weight + k) * src[mirrorBottom(j+k) * imageWidth];
2163 sum += weight * src[mirrorBottom(j) * imageWidth];
2168 while ((j + 7) < radius + y) {
2169 for (
int k = 0; k < 8; ++k)
2170 sum += (weight - k) * src[mirrorTop(j + k, imageHeight) * imageWidth];
2174 while (j < radius + y) {
2175 sum += weight * src[mirrorTop(j, imageHeight) * imageWidth];
2180 CLPixelType pixel = srcImage[x + y * imageWidth];
2181 float srcVal = dot(RGB, convert_float4(pixel));
2182 float mult = (srcVal - (sum / ((radius + 1) * (radius + 1)))) * (strength / 100.0f);
2183 mult = (srcVal + mult) / srcVal;
2185 pixel.x = ClampToQuantum(pixel.x * mult);
2186 pixel.y = ClampToQuantum(pixel.y * mult);
2187 pixel.z = ClampToQuantum(pixel.z * mult);
2189 dstImage[x + y * imageWidth] = pixel;
2207 static inline void ConvertRGBToHSL(
const CLQuantum red,
const CLQuantum green,
const CLQuantum blue,
2208 float *hue,
float *saturation,
float *lightness)
2218 tmax=MagickMax(QuantumScale*red,MagickMax(QuantumScale*green, QuantumScale*blue));
2219 tmin=MagickMin(QuantumScale*red,MagickMin(QuantumScale*green, QuantumScale*blue));
2223 *lightness=(tmax+tmin)/2.0;
2231 if (tmax == (QuantumScale*red))
2233 *hue=(QuantumScale*green-QuantumScale*blue)/c;
2234 if ((QuantumScale*green) < (QuantumScale*blue))
2238 if (tmax == (QuantumScale*green))
2239 *hue=2.0+(QuantumScale*blue-QuantumScale*red)/c;
2241 *hue=4.0+(QuantumScale*red-QuantumScale*green)/c;
2244 if (*lightness <= 0.5)
2245 *saturation=c/(2.0*(*lightness));
2247 *saturation=c/(2.0-2.0*(*lightness));
2250 static inline void ConvertHSLToRGB(
const float hue,
const float saturation,
const float lightness,
2251 CLQuantum *red,CLQuantum *green,CLQuantum *blue)
2266 if (lightness <= 0.5)
2267 c=2.0*lightness*saturation;
2269 c=(2.0-2.0*lightness)*saturation;
2270 tmin=lightness-0.5*c;
2271 h-=360.0*floor(h/360.0);
2273 x=c*(1.0-fabs(h-2.0*floor(h/2.0)-1.0));
2274 switch ((
int) floor(h) % 6)
2320 *red=ClampToQuantum(QuantumRange*r);
2321 *green=ClampToQuantum(QuantumRange*g);
2322 *blue=ClampToQuantum(QuantumRange*b);
2325 static inline void ModulateHSL(
const float percent_hue,
const float percent_saturation,
const float percent_lightness,
2326 CLQuantum *red,CLQuantum *green,CLQuantum *blue)
2336 ConvertRGBToHSL(*red,*green,*blue,&hue,&saturation,&lightness);
2337 hue+=0.5*(0.01*percent_hue-1.0);
2342 saturation*=0.01*percent_saturation;
2343 lightness*=0.01*percent_lightness;
2344 ConvertHSLToRGB(hue,saturation,lightness,red,green,blue);
2347 __kernel
void Modulate(__global CLPixelType *im,
2348 const float percent_brightness,
2349 const float percent_hue,
2350 const float percent_saturation,
2351 const int colorspace)
2354 const int x = get_global_id(0);
2355 const int y = get_global_id(1);
2356 const int columns = get_global_size(0);
2357 const int c = x + y * columns;
2359 CLPixelType pixel = im[c];
2367 green=getGreen(pixel);
2368 blue=getBlue(pixel);
2375 ModulateHSL(percent_hue, percent_saturation, percent_brightness,
2376 &red, &green, &blue);
2381 CLPixelType filteredPixel;
2383 setRed(&filteredPixel, red);
2384 setGreen(&filteredPixel, green);
2385 setBlue(&filteredPixel, blue);
2386 filteredPixel.w = pixel.w;
2388 im[c] = filteredPixel;
2406 void MotionBlur(
const __global CLPixelType *input, __global CLPixelType *output,
2407 const unsigned int imageWidth,
const unsigned int imageHeight,
2408 const __global
float *filter,
const unsigned int width,
const __global int2* offset,
2410 const ChannelType channel,
const unsigned int matte) {
2413 currentPixel.x = get_global_id(0);
2414 currentPixel.y = get_global_id(1);
2416 if (currentPixel.x >= imageWidth
2417 || currentPixel.y >= imageHeight)
2421 pixel.x = (float)bias.x;
2422 pixel.y = (float)bias.y;
2423 pixel.z = (float)bias.z;
2424 pixel.w = (float)bias.w;
2426 if (((channel & OpacityChannel) == 0) || (matte == 0)) {
2428 for (
int i = 0; i < width; i++) {
2431 int2 samplePixel = currentPixel + offset[i];
2432 samplePixel.x = ClampToCanvas(samplePixel.x, imageWidth);
2433 samplePixel.y = ClampToCanvas(samplePixel.y, imageHeight);
2434 CLPixelType samplePixelValue = input[ samplePixel.y * imageWidth + samplePixel.x];
2436 pixel.x += (filter[i] * (float)samplePixelValue.x);
2437 pixel.y += (filter[i] * (float)samplePixelValue.y);
2438 pixel.z += (filter[i] * (float)samplePixelValue.z);
2439 pixel.w += (filter[i] * (float)samplePixelValue.w);
2442 CLPixelType outputPixel;
2443 outputPixel.x = ClampToQuantum(pixel.x);
2444 outputPixel.y = ClampToQuantum(pixel.y);
2445 outputPixel.z = ClampToQuantum(pixel.z);
2446 outputPixel.w = ClampToQuantum(pixel.w);
2447 output[currentPixel.y * imageWidth + currentPixel.x] = outputPixel;
2452 for (
int i = 0; i < width; i++) {
2455 int2 samplePixel = currentPixel + offset[i];
2456 samplePixel.x = ClampToCanvas(samplePixel.x, imageWidth);
2457 samplePixel.y = ClampToCanvas(samplePixel.y, imageHeight);
2459 CLPixelType samplePixelValue = input[ samplePixel.y * imageWidth + samplePixel.x];
2461 float alpha = QuantumScale*(QuantumRange-samplePixelValue.w);
2462 float k = filter[i];
2463 pixel.x = pixel.x + k * alpha * samplePixelValue.x;
2464 pixel.y = pixel.y + k * alpha * samplePixelValue.y;
2465 pixel.z = pixel.z + k * alpha * samplePixelValue.z;
2467 pixel.w += k * alpha * samplePixelValue.w;
2471 gamma = PerceptibleReciprocal(gamma);
2472 pixel.xyz = gamma*pixel.xyz;
2474 CLPixelType outputPixel;
2475 outputPixel.x = ClampToQuantum(pixel.x);
2476 outputPixel.y = ClampToQuantum(pixel.y);
2477 outputPixel.z = ClampToQuantum(pixel.z);
2478 outputPixel.w = ClampToQuantum(pixel.w);
2479 output[currentPixel.y * imageWidth + currentPixel.x] = outputPixel;
2497 __kernel
void RadialBlur(
const __global CLPixelType *im, __global CLPixelType *filtered_im,
2499 const unsigned int channel,
const unsigned int matte,
2500 const float2 blurCenter,
2501 __constant
float *cos_theta, __constant
float *sin_theta,
2502 const unsigned int cossin_theta_size)
2504 const int x = get_global_id(0);
2505 const int y = get_global_id(1);
2506 const int columns = get_global_size(0);
2507 const int rows = get_global_size(1);
2508 unsigned int step = 1;
2509 float center_x = (float) x - blurCenter.x;
2510 float center_y = (float) y - blurCenter.y;
2511 float radius = hypot(center_x, center_y);
2514 float blur_radius = hypot(blurCenter.x, blurCenter.y);
2516 if (radius > MagickEpsilon)
2518 step = (
unsigned int) (blur_radius / radius);
2521 if (step >= cossin_theta_size)
2522 step = cossin_theta_size-1;
2526 result.x = (float)bias.x;
2527 result.y = (float)bias.y;
2528 result.z = (float)bias.z;
2529 result.w = (float)bias.w;
2530 float normalize = 0.0f;
2532 if (((channel & OpacityChannel) == 0) || (matte == 0)) {
2533 for (
unsigned int i=0; i<cossin_theta_size; i+=step)
2535 result += convert_float4(im[
2536 ClampToCanvas(blurCenter.x+center_x*cos_theta[i]-center_y*sin_theta[i]+0.5f,columns)+
2537 ClampToCanvas(blurCenter.y+center_x*sin_theta[i]+center_y*cos_theta[i]+0.5f, rows)*columns]);
2540 normalize = PerceptibleReciprocal(normalize);
2541 result = result * normalize;
2545 for (
unsigned int i=0; i<cossin_theta_size; i+=step)
2547 float4 p = convert_float4(im[
2548 ClampToCanvas(blurCenter.x+center_x*cos_theta[i]-center_y*sin_theta[i]+0.5f,columns)+
2549 ClampToCanvas(blurCenter.y+center_x*sin_theta[i]+center_y*cos_theta[i]+0.5f, rows)*columns]);
2551 float alpha = (float)(QuantumScale*(QuantumRange-p.w));
2552 result.x += alpha * p.x;
2553 result.y += alpha * p.y;
2554 result.z += alpha * p.z;
2559 gamma = PerceptibleReciprocal(gamma);
2560 normalize = PerceptibleReciprocal(normalize);
2561 result.x = gamma*result.x;
2562 result.y = gamma*result.y;
2563 result.z = gamma*result.z;
2564 result.w = normalize*result.w;
2566 filtered_im[y * columns + x] = (CLPixelType) (ClampToQuantum(result.x), ClampToQuantum(result.y),
2567 ClampToQuantum(result.z), ClampToQuantum(result.w));
2585 float BoxResizeFilter(
const float x)
2593 float CubicBC(
const float x,
const __global
float* resizeFilterCoefficients)
2625 return(resizeFilterCoefficients[0]+x*(x*
2626 (resizeFilterCoefficients[1]+x*resizeFilterCoefficients[2])));
2628 return(resizeFilterCoefficients[3]+x*(resizeFilterCoefficients[4]+x*
2629 (resizeFilterCoefficients[5]+x*resizeFilterCoefficients[6])));
2635 float Sinc(
const float x)
2639 const float alpha=(float) (MagickPI*x);
2640 return sinpi(x)/alpha;
2647 float Triangle(
const float x)
2654 return ((x<1.0f)?(1.0f-x):0.0f);
2660 float Hanning(
const float x)
2666 const float cosine=cos((MagickPI*x));
2667 return(0.5f+0.5f*cosine);
2672 float Hamming(
const float x)
2678 const float cosine=cos((MagickPI*x));
2679 return(0.54f+0.46f*cosine);
2684 float Blackman(
const float x)
2693 const float cosine=cos((MagickPI*x));
2694 return(0.34f+cosine*(0.5f+cosine*0.16f));
2702 static inline float applyResizeFilter(
const float x,
const ResizeWeightingFunctionType filterType,
const __global
float* filterCoefficients)
2708 case SincWeightingFunction:
2709 case SincFastWeightingFunction:
2711 case CubicBCWeightingFunction:
2712 return CubicBC(x,filterCoefficients);
2713 case BoxWeightingFunction:
2714 return BoxResizeFilter(x);
2715 case TriangleWeightingFunction:
2717 case HanningWeightingFunction:
2719 case HammingWeightingFunction:
2721 case BlackmanWeightingFunction:
2732 static inline float getResizeFilterWeight(
const __global
float* resizeFilterCubicCoefficients,
const ResizeWeightingFunctionType resizeFilterType
2733 ,
const ResizeWeightingFunctionType resizeWindowType
2734 ,
const float resizeFilterScale,
const float resizeWindowSupport,
const float resizeFilterBlur,
const float x)
2737 float xBlur = fabs(x/resizeFilterBlur);
2738 if (resizeWindowSupport < MagickEpsilon
2739 || resizeWindowType == BoxWeightingFunction)
2745 scale = resizeFilterScale;
2746 scale = applyResizeFilter(xBlur*scale, resizeWindowType, resizeFilterCubicCoefficients);
2748 float weight = scale * applyResizeFilter(xBlur, resizeFilterType, resizeFilterCubicCoefficients);
2755 const char* accelerateKernels2 =
2759 static inline unsigned int getNumWorkItemsPerPixel(
const unsigned int pixelPerWorkgroup,
const unsigned int numWorkItems) {
2760 return (numWorkItems/pixelPerWorkgroup);
2765 static inline int pixelToCompute(
const unsigned itemID,
const unsigned int pixelPerWorkgroup,
const unsigned int numWorkItems) {
2766 const unsigned int numWorkItemsPerPixel = getNumWorkItemsPerPixel(pixelPerWorkgroup, numWorkItems);
2767 int pixelIndex = itemID/numWorkItemsPerPixel;
2768 pixelIndex = (pixelIndex<pixelPerWorkgroup)?pixelIndex:-1;
2775 __kernel __attribute__((reqd_work_group_size(256, 1, 1)))
2776 void ResizeHorizontalFilter(
const __global CLPixelType* inputImage,
const unsigned int inputColumns,
const unsigned int inputRows,
const unsigned int matte
2777 ,
const float xFactor, __global CLPixelType* filteredImage,
const unsigned int filteredColumns,
const unsigned int filteredRows
2778 ,
const int resizeFilterType,
const int resizeWindowType
2779 ,
const __global
float* resizeFilterCubicCoefficients
2780 ,
const float resizeFilterScale,
const float resizeFilterSupport,
const float resizeFilterWindowSupport,
const float resizeFilterBlur
2781 , __local CLPixelType* inputImageCache,
const int numCachedPixels,
const unsigned int pixelPerWorkgroup,
const unsigned int pixelChunkSize
2782 , __local float4* outputPixelCache, __local
float* densityCache, __local
float* gammaCache) {
2786 const unsigned int startX = get_group_id(0)*pixelPerWorkgroup;
2787 const unsigned int stopX = MagickMin(startX + pixelPerWorkgroup,filteredColumns);
2788 const unsigned int actualNumPixelToCompute = stopX - startX;
2791 float scale = MagickMax(1.0f/xFactor+MagickEpsilon ,1.0f);
2792 const float support = MagickMax(scale*resizeFilterSupport,0.5f);
2793 scale = PerceptibleReciprocal(scale);
2795 const int cacheRangeStartX = MagickMax((
int)((startX+0.5f)/xFactor+MagickEpsilon-support+0.5f),(
int)(0));
2796 const int cacheRangeEndX = MagickMin((
int)(cacheRangeStartX + numCachedPixels), (
int)inputColumns);
2799 const unsigned int y = get_global_id(1);
2800 event_t e = async_work_group_copy(inputImageCache,inputImage+y*inputColumns+cacheRangeStartX,cacheRangeEndX-cacheRangeStartX,0);
2801 wait_group_events(1,&e);
2803 unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize;
2804 for (
unsigned int chunk = 0; chunk < totalNumChunks; chunk++)
2807 const unsigned int chunkStartX = startX + chunk*pixelChunkSize;
2808 const unsigned int chunkStopX = MagickMin(chunkStartX + pixelChunkSize, stopX);
2809 const unsigned int actualNumPixelInThisChunk = chunkStopX - chunkStartX;
2812 const unsigned int itemID = get_local_id(0);
2813 const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(0));
2815 const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(0));
2817 float4 filteredPixel = (float4)0.0f;
2818 float density = 0.0f;
2821 if (pixelIndex != -1) {
2824 const int x = chunkStartX + pixelIndex;
2827 const float bisect = (x+0.5)/xFactor+MagickEpsilon;
2828 const unsigned int start = (
unsigned int)MagickMax(bisect-support+0.5f,0.0f);
2829 const unsigned int stop = (
unsigned int)MagickMin(bisect+support+0.5f,(
float)inputColumns);
2830 const unsigned int n = stop - start;
2833 unsigned int numStepsPerWorkItem = n / numItems;
2834 numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1);
2836 const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem;
2837 if (startStep < n) {
2838 const unsigned int stopStep = MagickMin(startStep+numStepsPerWorkItem, n);
2840 unsigned int cacheIndex = start+startStep-cacheRangeStartX;
2843 for (
unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
2844 float4 cp = convert_float4(inputImageCache[cacheIndex]);
2846 float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
2847 , (ResizeWeightingFunctionType)resizeWindowType
2848 , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
2850 filteredPixel += ((float4)weight)*cp;
2857 for (
unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
2858 CLPixelType p = inputImageCache[cacheIndex];
2860 float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
2861 , (ResizeWeightingFunctionType)resizeWindowType
2862 , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
2864 float alpha = weight * QuantumScale * GetPixelAlpha(p);
2865 float4 cp = convert_float4(p);
2867 filteredPixel.x += alpha * cp.x;
2868 filteredPixel.y += alpha * cp.y;
2869 filteredPixel.z += alpha * cp.z;
2870 filteredPixel.w += weight * cp.w;
2880 if (itemID < actualNumPixelInThisChunk) {
2881 outputPixelCache[itemID] = (float4)0.0f;
2882 densityCache[itemID] = 0.0f;
2884 gammaCache[itemID] = 0.0f;
2886 barrier(CLK_LOCAL_MEM_FENCE);
2889 for (
unsigned int i = 0; i < numItems; i++) {
2890 if (pixelIndex != -1) {
2891 if (itemID%numItems == i) {
2892 outputPixelCache[pixelIndex]+=filteredPixel;
2893 densityCache[pixelIndex]+=density;
2895 gammaCache[pixelIndex]+=gamma;
2899 barrier(CLK_LOCAL_MEM_FENCE);
2902 if (itemID < actualNumPixelInThisChunk) {
2904 float density = densityCache[itemID];
2905 float4 filteredPixel = outputPixelCache[itemID];
2906 if (density!= 0.0f && density != 1.0)
2908 density = PerceptibleReciprocal(density);
2909 filteredPixel *= (float4)density;
2911 filteredImage[y*filteredColumns+chunkStartX+itemID] = (CLPixelType) (ClampToQuantum(filteredPixel.x)
2912 , ClampToQuantum(filteredPixel.y)
2913 , ClampToQuantum(filteredPixel.z)
2914 , ClampToQuantum(filteredPixel.w));
2917 float density = densityCache[itemID];
2918 float gamma = gammaCache[itemID];
2919 float4 filteredPixel = outputPixelCache[itemID];
2921 if (density!= 0.0f && density != 1.0) {
2922 density = PerceptibleReciprocal(density);
2923 filteredPixel *= (float4)density;
2926 gamma = PerceptibleReciprocal(gamma);
2929 fp = (CLPixelType) ( ClampToQuantum(gamma*filteredPixel.x)
2930 , ClampToQuantum(gamma*filteredPixel.y)
2931 , ClampToQuantum(gamma*filteredPixel.z)
2932 , ClampToQuantum(filteredPixel.w));
2934 filteredImage[y*filteredColumns+chunkStartX+itemID] = fp;
2945 __kernel __attribute__((reqd_work_group_size(1, 256, 1)))
2946 void ResizeVerticalFilter(
const __global CLPixelType* inputImage,
const unsigned int inputColumns,
const unsigned int inputRows,
const unsigned int matte
2947 ,
const float yFactor, __global CLPixelType* filteredImage,
const unsigned int filteredColumns,
const unsigned int filteredRows
2948 ,
const int resizeFilterType,
const int resizeWindowType
2949 ,
const __global
float* resizeFilterCubicCoefficients
2950 ,
const float resizeFilterScale,
const float resizeFilterSupport,
const float resizeFilterWindowSupport,
const float resizeFilterBlur
2951 , __local CLPixelType* inputImageCache,
const int numCachedPixels,
const unsigned int pixelPerWorkgroup,
const unsigned int pixelChunkSize
2952 , __local float4* outputPixelCache, __local
float* densityCache, __local
float* gammaCache) {
2956 const unsigned int startY = get_group_id(1)*pixelPerWorkgroup;
2957 const unsigned int stopY = MagickMin(startY + pixelPerWorkgroup,filteredRows);
2958 const unsigned int actualNumPixelToCompute = stopY - startY;
2961 float scale = MagickMax(1.0f/yFactor+MagickEpsilon ,1.0f);
2962 const float support = MagickMax(scale*resizeFilterSupport,0.5f);
2963 scale = PerceptibleReciprocal(scale);
2965 const int cacheRangeStartY = MagickMax((
int)((startY+0.5f)/yFactor+MagickEpsilon-support+0.5f),(
int)(0));
2966 const int cacheRangeEndY = MagickMin((
int)(cacheRangeStartY + numCachedPixels), (
int)inputRows);
2969 const unsigned int x = get_global_id(0);
2970 event_t e = async_work_group_strided_copy(inputImageCache, inputImage+cacheRangeStartY*inputColumns+x, cacheRangeEndY-cacheRangeStartY, inputColumns, 0);
2971 wait_group_events(1,&e);
2973 unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize;
2974 for (
unsigned int chunk = 0; chunk < totalNumChunks; chunk++)
2977 const unsigned int chunkStartY = startY + chunk*pixelChunkSize;
2978 const unsigned int chunkStopY = MagickMin(chunkStartY + pixelChunkSize, stopY);
2979 const unsigned int actualNumPixelInThisChunk = chunkStopY - chunkStartY;
2982 const unsigned int itemID = get_local_id(1);
2983 const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(1));
2985 const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(1));
2987 float4 filteredPixel = (float4)0.0f;
2988 float density = 0.0f;
2991 if (pixelIndex != -1) {
2994 const int y = chunkStartY + pixelIndex;
2997 const float bisect = (y+0.5)/yFactor+MagickEpsilon;
2998 const unsigned int start = (
unsigned int)MagickMax(bisect-support+0.5f,0.0f);
2999 const unsigned int stop = (
unsigned int)MagickMin(bisect+support+0.5f,(
float)inputRows);
3000 const unsigned int n = stop - start;
3003 unsigned int numStepsPerWorkItem = n / numItems;
3004 numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1);
3006 const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem;
3007 if (startStep < n) {
3008 const unsigned int stopStep = MagickMin(startStep+numStepsPerWorkItem, n);
3010 unsigned int cacheIndex = start+startStep-cacheRangeStartY;
3013 for (
unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
3014 float4 cp = convert_float4(inputImageCache[cacheIndex]);
3016 float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
3017 , (ResizeWeightingFunctionType)resizeWindowType
3018 , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
3020 filteredPixel += ((float4)weight)*cp;
3027 for (
unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
3028 CLPixelType p = inputImageCache[cacheIndex];
3030 float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
3031 , (ResizeWeightingFunctionType)resizeWindowType
3032 , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
3034 float alpha = weight * QuantumScale * GetPixelAlpha(p);
3035 float4 cp = convert_float4(p);
3037 filteredPixel.x += alpha * cp.x;
3038 filteredPixel.y += alpha * cp.y;
3039 filteredPixel.z += alpha * cp.z;
3040 filteredPixel.w += weight * cp.w;
3050 if (itemID < actualNumPixelInThisChunk) {
3051 outputPixelCache[itemID] = (float4)0.0f;
3052 densityCache[itemID] = 0.0f;
3054 gammaCache[itemID] = 0.0f;
3056 barrier(CLK_LOCAL_MEM_FENCE);
3059 for (
unsigned int i = 0; i < numItems; i++) {
3060 if (pixelIndex != -1) {
3061 if (itemID%numItems == i) {
3062 outputPixelCache[pixelIndex]+=filteredPixel;
3063 densityCache[pixelIndex]+=density;
3065 gammaCache[pixelIndex]+=gamma;
3069 barrier(CLK_LOCAL_MEM_FENCE);
3072 if (itemID < actualNumPixelInThisChunk) {
3074 float density = densityCache[itemID];
3075 float4 filteredPixel = outputPixelCache[itemID];
3076 if (density!= 0.0f && density != 1.0)
3078 density = PerceptibleReciprocal(density);
3079 filteredPixel *= (float4)density;
3081 filteredImage[(chunkStartY+itemID)*filteredColumns+x] = (CLPixelType) (ClampToQuantum(filteredPixel.x)
3082 , ClampToQuantum(filteredPixel.y)
3083 , ClampToQuantum(filteredPixel.z)
3084 , ClampToQuantum(filteredPixel.w));
3087 float density = densityCache[itemID];
3088 float gamma = gammaCache[itemID];
3089 float4 filteredPixel = outputPixelCache[itemID];
3091 if (density!= 0.0f && density != 1.0) {
3092 density = PerceptibleReciprocal(density);
3093 filteredPixel *= (float4)density;
3096 gamma = PerceptibleReciprocal(gamma);
3099 fp = (CLPixelType) ( ClampToQuantum(gamma*filteredPixel.x)
3100 , ClampToQuantum(gamma*filteredPixel.y)
3101 , ClampToQuantum(gamma*filteredPixel.z)
3102 , ClampToQuantum(filteredPixel.w));
3104 filteredImage[(chunkStartY+itemID)*filteredColumns+x] = fp;
3126 __kernel
void UnsharpMaskBlurColumn(
const __global CLPixelType* inputImage,
3127 const __global float4 *blurRowData, __global CLPixelType *filtered_im,
3128 const unsigned int imageColumns,
const unsigned int imageRows,
3129 __local float4* cachedData, __local
float* cachedFilter,
3130 const ChannelType channel,
const __global
float *filter,
const unsigned int width,
3131 const float gain,
const float threshold)
3133 const unsigned int radius = (width-1)/2;
3136 const int groupX = get_group_id(0);
3137 const int groupStartY = get_group_id(1)*get_local_size(1) - radius;
3138 const int groupStopY = (get_group_id(1)+1)*get_local_size(1) + radius;
3140 if (groupStartY >= 0
3141 && groupStopY < imageRows) {
3142 event_t e = async_work_group_strided_copy(cachedData
3143 ,blurRowData+groupStartY*imageColumns+groupX
3144 ,groupStopY-groupStartY,imageColumns,0);
3145 wait_group_events(1,&e);
3148 for (
int i = get_local_id(1); i < (groupStopY - groupStartY); i+=get_local_size(1)) {
3149 cachedData[i] = blurRowData[ClampToCanvas(groupStartY+i,imageRows)*imageColumns+ groupX];
3151 barrier(CLK_LOCAL_MEM_FENCE);
3154 event_t e = async_work_group_copy(cachedFilter,filter,width,0);
3155 wait_group_events(1,&e);
3159 const int cy = get_global_id(1);
3161 if (cy < imageRows) {
3162 float4 blurredPixel = (float4) 0.0f;
3166 \n #ifndef UFACTOR \n
3167 \n #define UFACTOR 8 \n
3170 for ( ; i+UFACTOR < width; )
3172 \n #pragma unroll UFACTOR \n
3173 for (
int j=0; j < UFACTOR; j++, i++)
3175 blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
3179 for ( ; i < width; i++)
3181 blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
3184 blurredPixel = floor((float4)(ClampToQuantum(blurredPixel.x), ClampToQuantum(blurredPixel.y)
3185 ,ClampToQuantum(blurredPixel.z), ClampToQuantum(blurredPixel.w)));
3187 float4 inputImagePixel = convert_float4(inputImage[cy*imageColumns+groupX]);
3188 float4 outputPixel = inputImagePixel - blurredPixel;
3190 float quantumThreshold = QuantumRange*threshold;
3192 int4 mask = isless(fabs(2.0f*outputPixel), (float4)quantumThreshold);
3193 outputPixel = select(inputImagePixel + outputPixel * gain, inputImagePixel, mask);
3196 filtered_im[cy*imageColumns+groupX] = (CLPixelType) (ClampToQuantum(outputPixel.x), ClampToQuantum(outputPixel.y)
3197 ,ClampToQuantum(outputPixel.z), ClampToQuantum(outputPixel.w));
3206 __kernel
void UnsharpMask(__global CLPixelType *im, __global CLPixelType *filtered_im,
3207 __constant
float *filter,
3208 const unsigned int width,
3209 const unsigned int imageColumns,
const unsigned int imageRows,
3210 __local float4 *pixels,
3211 const float gain,
const float threshold,
const unsigned int justBlur)
3213 const int x = get_global_id(0);
3214 const int y = get_global_id(1);
3216 const unsigned int radius = (width - 1) / 2;
3218 int row = y - radius;
3219 int baseRow = get_group_id(1) * get_local_size(1) - radius;
3220 int endRow = (get_group_id(1) + 1) * get_local_size(1) + radius;
3222 while (row < endRow) {
3223 int srcy = (row < 0) ? -row : row;
3224 srcy = (srcy >= imageRows) ? (2 * imageRows - srcy - 1) : srcy;
3226 float4 value = 0.0f;
3228 int ix = x - radius;
3231 while (i + 7 < width) {
3232 for (
int j = 0; j < 8; ++j) {
3234 srcx = (srcx < 0) ? -srcx : srcx;
3235 srcx = (srcx >= imageColumns) ? (2 * imageColumns - srcx - 1) : srcx;
3236 value += filter[i + j] * convert_float4(im[srcx + srcy * imageColumns]);
3243 int srcx = (ix < 0) ? -ix : ix;
3244 srcx = (srcx >= imageColumns) ? (2 * imageColumns - srcx - 1) : srcx;
3245 value += filter[i] * convert_float4(im[srcx + srcy * imageColumns]);
3249 pixels[(row - baseRow) * get_local_size(0) + get_local_id(0)] = value;
3250 row += get_local_size(1);
3254 barrier(CLK_LOCAL_MEM_FENCE);
3257 const int px = get_local_id(0);
3258 const int py = get_local_id(1);
3259 const int prp = get_local_size(0);
3260 float4 value = (float4)(0.0f);
3263 while (i + 7 < width) {
3264 value += (float4)(filter[i]) * pixels[px + (py + i) * prp];
3265 value += (float4)(filter[i]) * pixels[px + (py + i + 1) * prp];
3266 value += (float4)(filter[i]) * pixels[px + (py + i + 2) * prp];
3267 value += (float4)(filter[i]) * pixels[px + (py + i + 3) * prp];
3268 value += (float4)(filter[i]) * pixels[px + (py + i + 4) * prp];
3269 value += (float4)(filter[i]) * pixels[px + (py + i + 5) * prp];
3270 value += (float4)(filter[i]) * pixels[px + (py + i + 6) * prp];
3271 value += (float4)(filter[i]) * pixels[px + (py + i + 7) * prp];
3275 value += (float4)(filter[i]) * pixels[px + (py + i) * prp];
3278 if ((x < imageColumns) && (y < imageRows)) {
3279 if (justBlur == 0) {
3280 float4 srcPixel = convert_float4(im[x + y * imageColumns]);
3281 float4 diff = srcPixel - value;
3283 float quantumThreshold = QuantumRange*threshold;
3285 int4 mask = isless(fabs(2.0f * diff), (float4)quantumThreshold);
3286 value = select(srcPixel + diff * gain, srcPixel, mask);
3288 filtered_im[x + y * imageColumns] = (CLPixelType)(ClampToQuantum(value.s0), ClampToQuantum(value.s1), ClampToQuantum(value.s2), ClampToQuantum(value.s3));
3294 __kernel __attribute__((reqd_work_group_size(64, 4, 1)))
void WaveletDenoise(__global CLPixelType *srcImage, __global CLPixelType *dstImage,
3295 const float threshold,
3297 const int imageWidth,
3298 const int imageHeight)
3300 const int pad = (1 << (passes - 1));
3301 const int tileSize = 64;
3302 const int tileRowPixels = 64;
3303 const float noise[] = { 0.8002, 0.2735, 0.1202, 0.0585, 0.0291, 0.0152, 0.0080, 0.0044 };
3305 CLPixelType stage[16];
3307 local
float buffer[64 * 64];
3309 int srcx = (get_group_id(0) + get_global_offset(0) / tileSize) * (tileSize - 2 * pad) - pad + get_local_id(0);
3310 int srcy = (get_group_id(1) + get_global_offset(1) / 4) * (tileSize - 2 * pad) - pad;
3312 for (
int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
3313 stage[i / 4] = srcImage[mirrorTop(mirrorBottom(srcx), imageWidth) + (mirrorTop(mirrorBottom(srcy + i) , imageHeight)) * imageWidth];
3317 for (
int channel = 0; channel < 3; ++channel) {
3321 for (
int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3322 buffer[get_local_id(0) + i * tileRowPixels] = convert_float(stage[i / 4].s0);
3325 for (
int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3326 buffer[get_local_id(0) + i * tileRowPixels] = convert_float(stage[i / 4].s1);
3329 for (
int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3330 buffer[get_local_id(0) + i * tileRowPixels] = convert_float(stage[i / 4].s2);
3341 for (
int pass = 0; pass < passes; ++pass) {
3342 const int radius = 1 << pass;
3343 const int x = get_local_id(0);
3344 const float thresh = threshold * noise[pass];
3347 accum[0] = accum[1] = accum[2] = accum[3] = accum[4] = accum[5] = accum[6] = accum[6] = accum[7] = accum[8] = accum[9] = accum[10] = accum[11] = accum[12] = accum[13] = accum[14] = accum[15] = 0.0f;
3352 for (
int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
3353 const int offset = i * tileRowPixels;
3355 tmp[i / 4] = buffer[x + offset];
3356 pixel = 0.5f * tmp[i / 4] + 0.25 * (buffer[mirrorBottom(x - radius) + offset] + buffer[mirrorTop(x + radius, tileSize) + offset]);
3357 barrier(CLK_LOCAL_MEM_FENCE);
3358 buffer[x + offset] = pixel;
3360 barrier(CLK_LOCAL_MEM_FENCE);
3362 for (
int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
3363 pixel = 0.5f * buffer[x + i * tileRowPixels] + 0.25 * (buffer[x + mirrorBottom(i - radius) * tileRowPixels] + buffer[x + mirrorTop(i + radius, tileRowPixels) * tileRowPixels]);
3364 float delta = tmp[i / 4] - pixel;
3366 if (delta < -thresh)
3368 else if (delta > thresh)
3372 accum[i / 4] += delta;
3375 barrier(CLK_LOCAL_MEM_FENCE);
3376 if (pass < passes - 1)
3377 for (
int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3378 buffer[x + i * tileRowPixels] = tmp[i / 4];
3380 for (
int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3381 accum[i / 4] += tmp[i / 4];
3382 barrier(CLK_LOCAL_MEM_FENCE);
3387 for (
int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3388 stage[i / 4].s0 = ClampToQuantum(accum[i / 4]);
3391 for (
int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3392 stage[i / 4].s1 = ClampToQuantum(accum[i / 4]);
3395 for (
int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3396 stage[i / 4].s2 = ClampToQuantum(accum[i / 4]);
3400 barrier(CLK_LOCAL_MEM_FENCE);
3405 if ((get_local_id(0) >= pad) && (get_local_id(0) < tileSize - pad) && (srcx >= 0) && (srcx < imageWidth)) {
3407 for (
int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
3408 if ((i >= pad) && (i < tileSize - pad) && (srcy + i >= 0) && (srcy + i < imageHeight)) {
3409 dstImage[srcx + (srcy + i) * imageWidth] = stage[i / 4];
3419#if defined(__cplusplus) || defined(c_plusplus)