Winograd算法的python实现

发布时间:2024-01-18 18:30

算法介绍

F ( m ∗ m , r ∗ r ) F(m*m,r*r) F(mm,rr):
一个 ( m + r − 1 ) ∗ ( m + r − 1 ) (m+r-1)*(m+r-1) (m+r1)(m+r1)的输入特征图和一个 r ∗ r r*r rr的卷积核进行2d卷积得到 m ∗ m m*m mm的输出,若采用直接卷积,则需要 m 2 r 2 m^2r^2 m2r2个乘法,而若采用winograd算法,则乘法数量减少为 ( m + r − 1 ) ∗ ( m + r − 1 ) (m+r-1)*(m+r-1) (m+r1)(m+r1),具体计算过程如下:
在这里插入图片描述
其中,g为 r ∗ r r*r rr的卷积核,d为 n ∗ n n*n nn的输入 t i l e ( n = m + r − 1 ) tile(n=m+r-1) tile(n=m+r1),而G,B,A分别是卷积核变换矩阵、输入特征变换矩阵以及逆变换矩阵,对于 F ( 2 ∗ 2 , 3 ∗ 3 ) F(2*2,3*3) F(22,33) A , G , B A,G,B A,G,B的值分别如下:
Winograd算法的python实现_第1张图片
现有 N ∗ H ∗ W 的 输 入 特 征 图 和 M ∗ N ∗ r ∗ r ( r = 3 ) N*H*W的输入特征图和M*N*r*r(r=3) NHWMNrr(r=3)的卷积核,使用Winograd算法进行计算,则步骤如下
我们选用 F ( 2 ∗ 2 , 3 ∗ 3 ) F(2*2,3*3) F(22,33)进行计算,设 g k , t g_{k,t} gk,t为第k个输出通道第t个输入通道的卷积核,我们对输入特征图进行4x4的分块,滑动步长为m=2,则设 d t , b d_{t,b} dt,b表示第t个输入通道的第b个tile,那么我们有:
U k , t = G g k , t G T U_{k,t}=Gg_{k,t}G^T Uk,t=Ggk,tGT
V t , b = B T d t , b B V_{t,b}=B^Td_{t,b}B Vt,b=BTdt,bB
易知
对每个给定的 k , t , b k,t,b k,t,b U k , t 和 V t , b U_{k,t}和V_{t,b} Uk,tVt,b都是 n ∗ n n*n nn的矩阵,现在我们固定这 n ∗ n n*n nn个元素的横纵坐标,让 k , t , b k,t,b k,t,b变化,则我们有如下 n ∗ n n*n nn对矩阵
U k , t i , j , V t , b i , j , ( i , j = 0 , 1 , . . . , n − 1 ) U^{i,j}_{k,t},V^{i,j}_{t,b},(i,j=0,1,...,n-1) Uk,ti,j,Vt,bi,j,(i,j=0,1,...,n1)
n ∗ n n*n nn个矩阵分别相乘,得到
Q k , b i , j = U k , t i , j ∗ V t , b i , j Q^{i,j}_{k,b}=U^{i,j}_{k,t}*V^{i,j}_{t,b} Qk,bi,j=Uk,ti,jVt,bi,j
再将 Q Q Q进行逆变换,得到
Y k , b = A T Q k , b A Y_{k,b}=A^TQ_{k,b}A Yk,b=ATQk,bA
此时Y的维度为 k ∗ b ∗ m ∗ m k*b*m*m kbmm
经过变换,可以最终得到输出特征图
O ( k , h o , w o ) O(k,h_o,w_o) O(k,ho,wo)

代码


'''
A^T=[1  1  1  0
     0  1 -1  1]
G=[  1     0    0
   0.5   0.5  0.5
   0.5  -0.5  0.5
     0     0    1]
B^T=[1   0  -1  0
     0   1   1  0
     0  -1   1  0
     0  -1   0  1]

Y=A^T[(GgG^T)*(B^TdB)]A
U(k,t)=Gg(k,t)G^T
V(t,b)=B^Td(t,b)B
Q(i,j)=U(i,j)*V(i,j)                  i=0,1,...,n-1,j=0,1,...,n-1,shape=(k,b)
gather i,j to shape Q(k,b,n,n)
Y=A^TQA  shape Y(k,b,m,m)
'''
import numpy as np
#F(2,2,3,3)
def FeatureTransform(x):
    #x(4,4)
    B=np.array([[1,0,0,0],[0,1,-1,-1],[-1,1,1,0],[0,0,0,1]])
    return np.matmul(np.matmul(B.T,x),B)

def FilterTransform(x):
    #x(3,3)
    G=np.array([[1,0,0],[0.5,0.5,0.5],[0.5,-0.5,0.5],[0,0,1]])
    return np.matmul(np.matmul(G,x),G.T)

def ReverseTransform(x):
    A=np.array([[1,0],[1,1],[1,-1],[0,1]])
    return np.matmul(np.matmul(A.T,x),A)

def conv2d(x,w):
    n,_=x.shape
    r,_=w.shape
    m=n-r+1
    out=np.zeros((m,m))
    for i in range(m):
        for j in range(m):
            out[i,j]=np.sum(np.multiply(w,x[i:i+r,j:j+r]))
    return out

def conv(x,f):
    c,h,w=x.shape
    n,c,k,k=f.shape
    h_o=(h-k+1)
    w_o=(w-k+1)
    out=np.zeros((n,h_o,w_o))
    for i in range(h_o):
        for j in range(w_o):
            for nn in range(n):
                out[nn,i,j]=np.sum(np.multiply(f[nn,:,:,:],x[:,i:i+k,j:j+k]))
    return out

def winograd1(x,w):
    U=FilterTransform(w)
    V=FeatureTransform(x)
    Q=np.multiply(U,V)
    return ReverseTransform(Q)

def wingrad2(x,f):
    #F(2,2,3,3)
    n,h,w=x.shape
    m,n,kx,ky=f.shape
    if kx!=3 or ky!=3:
        return None
    h_o,w_o=h-kx+1,w-ky+1
    l=4
    b=int(h_o/2*w_o/2)                           #tile个数
    U=np.zeros((m,n,l,l))
    V=np.zeros((n,b,l,l))
    Q=np.zeros((m,b,l,l))
    Y=np.zeros((m,b,2,2))
    #transform filter
    for mm in range(m):
        for nn in range(n):
            U[mm,nn,:,:]=FilterTransform(f[mm,nn,:,:])
    #transform feature
    for nn in range(n):
        for bb in range(b):
            #计算第b个tile的起始地址,tile stride=m=2
            r=int(bb)//(int(w_o/2))
            c=int(bb)%(int(w_o/2))
            tile=x[nn,r*2:r*2+l,c*2:c*2+l]
            V[nn,bb,:,:]=FeatureTransform(tile)
    #MM U(m,n,l,l)*V(n,b,l,l)
    for i in range(l):
        for j in range(l):
            Q[:,:,i,j]=np.matmul(U[:,:,i,j],V[:,:,i,j])
    #reverse transform    Q(m,b,l,l)->Y(m,b,2,2)
    for mm in range(m):
        for bb in range(b):
            Y[mm,bb,:,:]=ReverseTransform(Q[mm,bb,:,:])
    #restore to O[m,h_o,w_o]
    O=np.zeros((m,h_o,w_o))
    for mm in range(m):
        for bb in range(b):
            for i in range(2):
                for j in range(2):
                    r=bb//int(w_o/2)
                    c=bb%int(w_o/2)
                    O[mm,2*r:2*r+2,2*c:2*c+2]=Y[mm,bb,:,:]
    return O

d=np.random.randint(low=0,high=30, size=(4,4))
g=np.random.randint(low=0,high=20,size=(3,3))
print(winograd1(d,g))
print(conv2d(d,g))


ch_in=8
ch_out=10
height=8
width=6
X=np.random.randint(low=0,high=100,size=(ch_in,height,width))
W=np.random.randint(low=0,high=100,size=(ch_out,ch_in,3,3))                       #6-3+1=4,共2x2个tile
print(np.max(np.abs(conv(X,W)-wingrad2(X,W))))
#print(wingrad2(X,W))

测试,运行结果如下
Winograd算法的python实现_第2张图片
0.0说明功能无误!

更新

F(4x4,3x3)的实现

变换矩阵如图所示
Winograd算法的python实现_第3张图片
相应的python代码(已解决分块不整除的问题)


'''
Y=A^T[(GgG^T)*(B^TdB)]A
U(k,t)=Gg(k,t)G^T
V(t,b)=B^Td(t,b)B
Q(i,j)=U(i,j)*V(i,j)                  i=0,1,...,n-1,j=0,1,...,n-1,shape=(k,b)
gather i,j to shape Q(k,b,n,n)
Y=A^TQA  shape Y(k,b,m,m)
'''
import math
import numpy as np
#F(4,4,3,3)
#input tile shape=(6,6)
def FeatureTransform(x):
    #x(6,6)
    B=np.array([[ 4, 0, 0, 0, 0, 0],
                [ 0,-4, 4,-2, 2, 4],
                [-5,-4,-4,-1,-1, 0],
                [ 0, 1,-1, 2,-2,-5],
                [ 1, 1, 1, 1, 1, 0],
                [ 0, 0, 0, 0, 0, 1]])
    return np.matmul(np.matmul(B.T,x),B)

def FilterTransform(x):
    #x(3,3)
    G=np.array([[ 1/4,   0,   0],
                [-1/6,-1/6,-1/6],
                [-1/6, 1/6,-1/6],
                [1/24,1/12, 1/6],
                [1/24,-1/12,1/6],
                [   0,    0,  1]])
    return np.matmul(np.matmul(G,x),G.T)

def ReverseTransform(x):
    A=np.array([[1, 0,0, 0],
                [1, 1,1, 1],
                [1,-1,1,-1],
                [1, 2,4, 8],
                [1,-2,4,-8],
                [0, 0,0, 1]])
    return np.matmul(np.matmul(A.T,x),A)

def conv2d(x,w):
    n,_=x.shape
    r,_=w.shape
    m=n-r+1
    out=np.zeros((m,m))
    for i in range(m):
        for j in range(m):
            out[i,j]=np.sum(np.multiply(w,x[i:i+r,j:j+r]))
    return out

def conv(x,f):
    c,h,w=x.shape
    n,c,k,k=f.shape
    h_o=(h-k+1)
    w_o=(w-k+1)
    out=np.zeros((n,h_o,w_o))
    for i in range(h_o):
        for j in range(w_o):
            for nn in range(n):
                out[nn,i,j]=np.sum(np.multiply(f[nn,:,:,:],x[:,i:i+k,j:j+k]))
    return out

def winograd1(x,w):
    U=FilterTransform(w)
    V=FeatureTransform(x)
    Q=np.multiply(U,V)
    return ReverseTransform(Q)

def winograd2(x,f):
    #F(4,4,3,3)
    n,h,w=x.shape
    m,n,kx,ky=f.shape
    if kx!=3 or ky!=3:
        return None
    h_o,w_o=h-kx+1,w-ky+1
    l=6               #m+r-1=6
    b=int(math.ceil(h_o/4)*math.ceil(w_o/4))                           #tile个数
    U=np.zeros((m,n,l,l))
    V=np.zeros((n,b,l,l))
    Q=np.zeros((m,b,l,l))
    Y=np.zeros((m,b,4,4))
    #transform filter
    for mm in range(m):
        for nn in range(n):
            U[mm,nn,:,:]=FilterTransform(f[mm,nn,:,:])
    #transform feature
    for nn in range(n):
        for bb in range(b):
            #计算第b个tile的起始地址,tile stride=m=4
            r=int(bb)//(int(math.ceil(w_o/4)))
            c=int(bb)%(int(math.ceil(w_o/4)))
            if r*4+l<h:
                rmax=r*4+l
            else:
                rmax=h
            if c*4+l<w:
                cmax=c*4+l
            else:
                cmax=w
            tile=np.zeros((l,l))
            tile[0:rmax-4*r,0:cmax-4*c]=x[nn,r*4:rmax,c*4:cmax]
            V[nn,bb,:,:]=FeatureTransform(tile)
    #MM U(m,n,l,l)*V(n,b,l,l)
    for i in range(l):
        for j in range(l):
            Q[:,:,i,j]=np.matmul(U[:,:,i,j],V[:,:,i,j])
    #reverse transform    Q(m,b,l,l)->Y(m,b,4,4)
    for mm in range(m):
        for bb in range(b):
            Y[mm,bb,:,:]=ReverseTransform(Q[mm,bb,:,:])
    #restore to O[m,h_o,w_o]
    O=np.zeros((m,h_o,w_o))
    for mm in range(m):
        for bb in range(b):
            r=int(bb)//int(math.ceil(w_o/4))
            c=int(bb)%int(math.ceil(w_o/4))
            if 4*r+4>=h_o and 4*c+4>=w_o:
                O[mm,4*r:h_o,4*c:w_o]=Y[mm,bb,0:h_o-4*r,0:w_o-4*c]
            elif 4*r+4>=h_o and 4*c+4<w_o:
                O[mm,4*r:h_o,4*c:4*c+4]=Y[mm,bb,0:h_o-4*r,:]
            elif 4*r+4<h_o and 4*c+4>=w_o:
                O[mm,4*r:4*r+4,4*c:w_o]=Y[mm,bb,:,0:w_o-4*c]
            else:
                O[mm,4*r:4*r+4,4*c:4*c+4]=Y[mm,bb,:,:]
    return O

d=np.random.randint(low=0,high=30, size=(6,6))
g=np.random.randint(low=0,high=20, size=(3,3))
print(np.max(np.abs(winograd1(d,g)-conv2d(d,g))))
#print(conv2d(d,g))

ch_in=33
ch_out=27
height=111
width=137
X=np.random.randint(low=0,high=100,size=(ch_in,height,width))
W=np.random.randint(low=0,high=100,size=(ch_out,ch_in,3,3))
print(np.max(np.abs(winograd2(X,W)-conv(X,W))))

误差允许范围内功能正确:
Winograd算法的python实现_第4张图片

ItVuer - 免责声明 - 关于我们 - 联系我们

本网站信息来源于互联网,如有侵权请联系:561261067@qq.com

桂ICP备16001015号