C++ HLSL实现简单的图像处理功能
由于对于dxva2解码得到的数据不宜copy回内存给CPU处理,所以最好的办法是在GPU上直接进行处理。D3D的像素着色器能够对像素直接进行操作,实现点运算极其简单方便,简单的卷积运算效果也非常好。但D3D9的限制也很多,对于过于复杂的图像处理则显得有些不能胜任。
1.点运算
点运算用HLSL非常容易实现,几乎是公式怎么写,代码就怎么写。以RGB转灰度图显示为例:
textureTex0;
intiFlag=0;
floataValue=0.0;
floatbValue=0.0;
sampler2DYTex=
sampler_state
{
Texture=<Tex0>;
MipFilter=LINEAR;
MinFilter=LINEAR;
MagFilter=LINEAR;
AddressU=CLAMP;
AddressV=CLAMP;
};
structPS_INPUT
{
float2uvCoords0:TEXCOORD0;
};
float4Main(PS_INPUTinput):COLOR0
{
float4yuvColor;
//rgbtogray不知道是不是这么显示的,姑且这么认为
floatgray=tex2D(YTex,input.uvCoords0).r*0.299+tex2D(YTex,input.uvCoords0).g*0.587+tex2D(YTex,input.uvCoords0).b*0.114;
floats=0;
if(iFlag==0)
{
s=aValue*gray+bValue/255;
}
elseif(iFlag==1)
{
s=aValue*log(1+gray);
}
elseif(iFlag==2)
{
s=aValue*pow(abs(gray),bValue);
}
yuvColor.r=s;
yuvColor.g=s;
yuvColor.b=s;
yuvColor.a=1.0;
returnyuvColor;
}
点运算如此简单是因为GPU是并行运算的,我个人认为可以看成是每一个像素点(BGRA)对应一个线程,这大概就是OpenCL中所谓的数据并行。这是一个非常简单的程序,指令数少,程序结构也很简单,shader的版本用2.0就可以轻松编过。
2.卷积运算举例
指令数较多的情况2.0版本的shader就搞不定了,上3.0版本可以做一些简单的卷积运算。以中值滤波为例:
textureTex0;
matrixWorldMatrix;
matrixViewMatrix;
matrixProjMatrix;
sampler2DYTex=
sampler_state
{
Texture=<Tex0>;
MipFilter=LINEAR;
MinFilter=LINEAR;
MagFilter=LINEAR;
AddressU=CLAMP;
AddressV=CLAMP;
};
structVS_INPUT
{
float4pos:POSITION;
float4color:COLOR0;
float2tex:TEXCOORD0;
};
//
structVS_OUTPUT
{
float4pos:POSITION;
float4color:COLOR0;
float2tex:TEXCOORD0;
};
float2g_v4ScreenSize;
intksize=1;
floatfLeft=-1.0f;
floatfTop=-1.0f;
floatfRight=-1.0f;
floatfBottom=-1.0f;
//---------------------------------BurTechnique--------------------------------------
VS_OUTPUTMainVS_Screen(VS_INPUTIn)
{
VS_OUTPUTOut=(VS_OUTPUT)0;
float4x4matWorldView=mul(WorldMatrix,ViewMatrix);
float4x4matProject=mul(matWorldView,ProjMatrix);
Out.pos=mul(In.pos,matProject);
Out.tex=In.tex;
Out.color=In.color;
returnOut;
}
float4MainPS_Screen(VS_INPUTIn):COLOR0
{
float4outColor=tex2D(YTex,In.tex).rgba;
if(ksize<=1||ksize%2==0)
{
returnoutColor;
}
if(ksize>11||ksize<3)
{
returnoutColor;
}
if(!(In.tex.x<fRight&&In.tex.y<fBottom&&In.tex.x>fLeft&&In.tex.y>fTop))
{
returnoutColor;
}
//纹理大小
float2TexSize=float2(g_v4ScreenSize.x,g_v4ScreenSize.y);
floatx_off=1.0f/TexSize.x;
floaty_off=1.0f/TexSize.y;
float2fX0Y0=In.tex-float2(x_off*ksize/2,y_off*ksize/2);
float3sum={0.0f,0.0f,0.0f};
if(ksize>=3)
{
sum+=tex2D(YTex,fX0Y0+float2(x_off*0,y_off*0)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*0,y_off*1)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*0,y_off*2)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*1,y_off*0)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*1,y_off*1)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*1,y_off*2)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*2,y_off*0)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*2,y_off*1)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*2,y_off*2)).rgb;
}
if(ksize>=5)
{
sum+=tex2D(YTex,fX0Y0+float2(x_off*3,y_off*0)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*3,y_off*1)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*3,y_off*2)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*3,y_off*3)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*3,y_off*4)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*4,y_off*0)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*4,y_off*1)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*4,y_off*2)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*4,y_off*3)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*4,y_off*4)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*0,y_off*3)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*1,y_off*3)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*2,y_off*3)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*0,y_off*4)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*1,y_off*4)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*2,y_off*4)).rgb;
}
if(ksize>=7)
{
sum+=tex2D(YTex,fX0Y0+float2(x_off*5,y_off*0)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*5,y_off*1)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*5,y_off*2)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*5,y_off*3)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*5,y_off*4)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*5,y_off*5)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*5,y_off*6)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*6,y_off*0)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*6,y_off*1)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*6,y_off*2)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*6,y_off*3)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*6,y_off*4)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*6,y_off*5)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*6,y_off*6)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*0,y_off*5)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*1,y_off*5)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*2,y_off*5)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*3,y_off*5)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*4,y_off*5)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*0,y_off*6)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*1,y_off*6)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*2,y_off*6)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*3,y_off*6)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*4,y_off*6)).rgb;
}
if(ksize>=9)
{
sum+=tex2D(YTex,fX0Y0+float2(x_off*7,y_off*0)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*7,y_off*1)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*7,y_off*2)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*7,y_off*3)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*7,y_off*4)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*7,y_off*5)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*7,y_off*6)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*7,y_off*7)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*7,y_off*8)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*8,y_off*0)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*8,y_off*1)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*8,y_off*2)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*8,y_off*3)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*8,y_off*4)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*8,y_off*5)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*8,y_off*6)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*8,y_off*7)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*8,y_off*8)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*0,y_off*7)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*1,y_off*7)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*2,y_off*7)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*3,y_off*7)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*4,y_off*7)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*5,y_off*7)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*6,y_off*7)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*0,y_off*8)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*1,y_off*8)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*2,y_off*8)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*3,y_off*8)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*4,y_off*8)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*5,y_off*8)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*6,y_off*8)).rgb;
}
if(ksize>=11)
{
sum+=tex2D(YTex,fX0Y0+float2(x_off*9,y_off*0)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*9,y_off*1)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*9,y_off*2)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*9,y_off*3)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*9,y_off*4)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*9,y_off*5)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*9,y_off*6)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*9,y_off*7)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*9,y_off*8)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*9,y_off*9)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*9,y_off*10)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*10,y_off*0)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*10,y_off*1)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*10,y_off*2)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*10,y_off*3)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*10,y_off*4)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*10,y_off*5)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*10,y_off*6)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*10,y_off*7)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*10,y_off*8)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*10,y_off*9)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*10,y_off*10)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*0,y_off*9)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*1,y_off*9)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*2,y_off*9)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*3,y_off*9)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*4,y_off*9)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*5,y_off*9)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*6,y_off*9)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*7,y_off*9)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*8,y_off*9)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*0,y_off*10)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*1,y_off*10)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*2,y_off*10)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*3,y_off*10)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*4,y_off*10)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*5,y_off*10)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*6,y_off*10)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*7,y_off*10)).rgb;
sum+=tex2D(YTex,fX0Y0+float2(x_off*8,y_off*10)).rgb;
}
outColor=float4(sum/(ksize*ksize),1.0f);
returnoutColor;
}
//---------------------------技术---------------------------
techniqueBurTechnique
{
passP0
{
LightEnable[0]=false;
VertexShader=compilevs_3_0MainVS_Screen();
PixelShader=compileps_3_0MainPS_Screen();
}
}
由于3.0版本的shader似乎不允许pixelshader单独出现,所以我从点运算用像素着色器实现改为用特效来实现。HLSL语法中有if语句,也有for语句,可是这个程序却不厌其烦的把所有的都给列出来来,而没有使用for循环。这是因为在实际使用中发现有一些限制,比如if语句的if(A>B),A与B中必须有一个是常量,就像上面见到的那种形式;for循环中间的判断也是如此,只是在第二层j循环中可以是第一层循环的i,即不可以
for(inti=0;i<ksize;i++)
{
for(intj=0;j<ksize1;j++)
{
..........
}
}
以上代码的ksize与ksize1都必须为常数,例外的情况是ksize1可以为第一层循环的i。这个问题不知道后续版本的shader有没有,反正我当前使用的版本有。
另外有一个需要注意的地方是指令数,2.0版本的shader支持的指令数相当少,3.0版本则要多好多,我最长写到了400多条快500条时才导致编译失败。还有一个需要提醒的是3.0版本的shader只支持D3D9.0C以后的。如果要求做更为复杂的图像处理,可以的话建议上D3D11,computeshader虽然我没用过,但从介绍来说,应该可以处理一些更为复杂的图像处理。
以上就是本文的全部内容,希望本文的内容对大家的学习或者工作能带来一定的帮助,同时也希望多多支持毛票票!