对python中Librosa的mfcc步骤详解
1.对语音数据归一化
如16000hz的数据,会将每个点/32768
2.计算窗函数:(*注意librosa中不进行预处理)
3.进行数据扩展填充,他进行的是镜像填充("reflect")
如原数据为12345-》填充为4的,左右各填充4即:5432123454321即:5432-12345-4321
4.分帧
5.加窗:对每一帧进行加窗,
6.进行fft傅里叶变换
librosa中fft计算,可以使用.net中的System.Numerics
MathNet.Numerics.IntegralTransforms.Fourier.Forward(FFT_frame,FourierOptions.Matlab)计算,结果相同
7.mel计算(每一帧取20个特征点)
ImportsSystem.Numerics
ImportsMathNet.Numerics
ImportsMathNet.Numerics.IntegralTransforms
Modulemfcc_module
PublicClassLibrosa
EndClass
DimpiAsDouble=3.1415926535897931
PublicFunctionspectrum(fft_data(,)AsComplex)AsDouble(,)
Dimnew_data(fft_data.GetLength(0)-1,fft_data.GetLength(1)-1)AsDouble
Forn=0Tofft_data.GetLength(0)-1
'Debug.Print("////////////////////////spectrum//////////////////")
'Debug.Print("////////////////////////spectrum//////////////////")
Fori=0Tofft_data.GetLength(1)-1
new_data(n,i)=fft_data(n,i).MagnitudeSquared
'Debug.Write(new_data(n,i)&"")
Next
Next
Returnnew_data
EndFunction
PublicFunctionFFT(dataAsDouble(,))AsComplex(,)
Dimresult(data.GetLength(0)-1,1024)AsComplex
'2049加了一个数组类型0开始
DimFFT_frameAsComplex()=NewComplex(data.GetLength(1)-1){}
Forn=0Todata.GetLength(0)-1
ForiAsInteger=0Todata.GetLength(1)-1
FFT_frame(i)=data(n,i)
Next
MathNet.Numerics.IntegralTransforms.Fourier.Forward(FFT_frame,FourierOptions.Matlab)
Fork=0To1024
result(n,k)=FFT_frame(k)
Next
'Debug.Print("fft**************")
'ForEachmemInFFT_frame
'Debug.Print(mem.ToString&"")
'Next
Nextn
Returnresult
EndFunction
PublicFunction_mfcc(dct_AsDouble(,),power_to_db_AsDouble(,))AsDouble(,)
'dct20,128
'power_to_db5,128
'result=20,5
Dimresult(dct_.GetLength(0)-1,power_to_db_.GetLength(1)-1)AsDouble
Dimr1,r2AsDouble
Forn=0Todct_.GetLength(0)-1'20
Fori=0Topower_to_db_.GetLength(1)-1'5
r2=0
Fork=0Todct_.GetLength(1)-1'128
r1=dct_(n,k)*power_to_db_(k,i)
r2=r2+r1
Next
result(n,i)=r2
Next
Next
Returnresult
EndFunction
PublicFunctionDct(n_filtersAsInteger,n_inputAsInteger)AsDouble(,)
Dimt1AsDouble=2*n_input
Dimsamples(n_input-1)AsDouble
Dimbasis(n_filters-1,n_input-1)AsDouble
DimnAsInteger=1
Fori=0Ton_input-1
samples(i)=n*pi/(2*n_input)
n=n+2
Nexti
Fori=0Ton_input-1
basis(0,i)=1/Math.Sqrt(n_input)
Next
Forn=1Ton_filters-1
Fori=0Ton_input-1
basis(n,i)=Math.Cos(n*samples(i))*Math.Sqrt(2/n_input)
Next
Next
Returnbasis
EndFunction
'1e-10=0.0000000001
PublicFunctionpower_to_db(SAsDouble(,),OptionalrefAsDouble=1,OptionaladminAsDouble=0.0000000001,Optionaltop_dbAsDouble=80)AsDouble(,)
Dimresult(S.GetLength(0)-1,S.GetLength(1)-1)AsDouble
Dimlog_specAsDouble
Forn=0ToS.GetLength(0)-1
Fori=0ToS.GetLength(1)-1
log_spec=10*Math.Log10(Math.Max(admin,S(n,i)))
result(n,i)=log_spec-10*Math.Log10(Math.Max(admin,ref))
Next
Next
'Iftop_db<>0Then
'Forn=0ToS.GetLength(0)-1
'Fori=0ToS.GetLength(1)-1
''result(n,i)=Math.Max(result(n,i),result(n,i)-top_db)
'Next
'Next
'EndIf
Returnresult
EndFunction
PublicFunctionmelspectrogram(mel_basis(,)AsDouble,s(,)AsDouble)AsDouble(,)
'mel_basis128,1025
's5,1025->1025,5
'result128,5
Dimresult(mel_basis.GetLength(0)-3,s.GetLength(0)-1)AsDouble
Dimr1,r2AsDouble
Forn=0Tomel_basis.GetLength(0)-3
Fori=0Tos.GetLength(0)-1
Fork=0Tomel_basis.GetLength(1)-1
r1=mel_basis(n,k)*s(i,k)
r2=r2+r1
Next
result(n,i)=r2
r2=0
Next
Next
Returnresult
EndFunction
PublicFunctionnormal(mel_fAsDouble(),weights(,)AsDouble)AsDouble(,)
Dimenorm(mel_f.Length-2)AsDouble
'Debug.Print("*************normal//////////////")
'Debug.Print("*************normal//////////////")
Fori=0Tomel_f.Length-3
enorm(i)=2/(mel_f(2+i)-mel_f(i))
Next
Fori=0Toweights.GetLength(1)-1
Forn=0Toweights.GetLength(0)-2
weights(n,i)=weights(n,i)*enorm(n)
Next
Next
Returnweights
EndFunction
PublicFunctionweight(aAsDouble(,),fdiffAsDouble())AsDouble(,)
Dimlower,upperAsDouble
Dimdata(a.GetLength(0)-1,a.GetLength(1)-1)AsDouble
Forn=0Toa.GetLength(0)-3
Fori=0Toa.GetLength(1)-1
lower=-(a(n,i)/fdiff(n))
upper=a(n+2,i)/fdiff(n+1)
data(n,i)=Math.Max(0,Math.Min(lower,upper))
Next
Next
Returndata
EndFunction
PublicFunctionramps(AAsDouble(),BAsDouble())AsDouble(,)
Dimdata(A.Length-1,B.Length-1)AsDouble
'Debug.Print("ramps*********************")
Forn=0ToA.Length-1
'Debug.Print("******")
'Debug.Print("------")
Fori=0ToB.Length-1
data(n,i)=A(n)-B(i)
'Debug.Write(data(n,i)&"")
Next
Next
Returndata
EndFunction
PublicFunctiondiff(arrAsDouble())AsDouble()
Dimdata(arr.Length-2)AsDouble
Fori=1Toarr.Length-1
data(i-1)=arr(i)-arr(i-1)
'Debug.Print(data(i-1))
Next
Returndata
EndFunction
'分帧算法2
PublicFunctionFrame2(yAsDouble(),Optionaln_fttAsInteger=2048,OptionalhopAsInteger=512)AsDouble(,)
DimtimAsInteger=Math.Floor((y.Length-n_ftt)/hop)+1
Dimnew_buff(tim-1,n_ftt-1)AsDouble
DimcopyposAsInteger=0
Fori=0Totim-1
Fork=0Ton_ftt-1
new_buff(i,k)=y(copypos+k)
Next
copypos=copypos+hop
Next
'Fork=0Totim-1
'Debug.Print("//////////////////////////////////////")
'Debug.Print("///////////////fram2///////////////////////"&k)
'Fori=0Ton_ftt-1
'Debug.Print(new_buff(k,i)&"")
'Next
'Nextk
Returnnew_buff
EndFunction
'
PublicFunctionFrame(yAsDouble(),Optionaln_fttAsInteger=2048,OptionalhopAsInteger=512)AsDouble()
DimtimAsInteger=Math.Floor((y.Length-n_ftt)/hop)+1
Dimnew_buff(tim*n_ftt)AsDouble
DimposAsInteger=0
DimcopyposAsInteger=0
Fori=0Totim-1
Array.Copy(y,copypos,new_buff,pos,n_ftt)
'Buffer.BlockCopy(y,0,new_buff,pos,n_ftt)
copypos=copypos+hop
pos=pos+n_ftt
Next
Fork=0Totim-1
'Debug.Print("//////////////////////////////////////")
'Debug.Print("//////////////////////////////////////")
Fori=0Ton_ftt-1
Debug.Write(new_buff(k*n_ftt+i)&"")
Next
Nextk
Returnnew_buff
EndFunction
PublicFunctionMelFilter()AsDouble()
Dimfilter_points(128+1)AsInteger'40个滤波器,需要41点
ConstsampleRateAsInteger=16000'采样频率16000
ConstfilterNumAsInteger=128'滤波器数量取40个
ConstframeSizeAsInteger=512'帧长512
DimfreMaxAsDouble=sampleRate/2'实际最大频率
DimfreMinAsDouble=0'实际最小频率
DimmelFremaxAsDouble=hz_to_mel(freMax)'将实际频率转换成梅尔频率
DimmelFreminAsDouble=1125*Math.Log(1+freMin/700)
DimkAsDouble=(melFremax-melFremin)/(filterNum+1)
DimmAsDouble()=NewDouble(filterNum+1){}
DimhAsDouble()=NewDouble(filterNum+1){}
ForiAsInteger=0TofilterNum+1
m(i)=melFremin+k*i
'h(i)=700*(Math.Exp(m(i)/1125)-1)
'将梅尔频率转换成实际频率
filter_points(i)=mel_to_hz(m(i))
'Debug.Print(m(i))
Next
DimhzsAsDouble()=mel_to_hz2(m)
'Fori=0TofilterNum+1
''Debug.Print(hzs(i))
'Next
Returnhzs
EndFunction
PublicFunctionhz_to_mel(frequenciesAsDouble,OptionalhtkAsBoolean=False)AsDouble
DimmelsAsDouble
IfhtkThen
mels=1125*Math.Log(1+frequencies/700)
Else
Dimf_minAsDouble=0.0
Dimf_spAsDouble=200.0/3
Dimmin_log_hzAsDouble=1000.0'beginningoflogregion(Hz)
Dimmin_log_melAsDouble=(min_log_hz-f_min)/f_sp'same(Mels)
DimlogstepAsDouble=Math.Log(6.4)/27.0'stepsizeforlogregion
mels=min_log_mel+Math.Log(frequencies/min_log_hz)/logstep
EndIf
Returnmels
EndFunction
PublicFunctionmel_to_hz2(mel()AsDouble,OptionalhtkAsBoolean=False)AsDouble()
Dimhz(mel.Length-1)AsDouble
Dimf_minAsDouble=0.0
Dimf_spAsDouble=200.0/3
Dimfreqs(mel.Length-1)AsDouble
Fori=0Tomel.Length-1
freqs(i)=f_min+f_sp*mel(i)
Nexti
Dimmin_log_hzAsDouble=1000.0'beginningoflogregion(Hz)
Dimmin_log_melAsDouble=(min_log_hz-f_min)/f_sp'same(Mels)
DimlogstepAsDouble=Math.Log(6.4)/27.0
Fori=0Tomel.Length-1
If(mel(i)>min_log_mel)Then
freqs(i)=min_log_hz*Math.Exp(logstep*(mel(i)-min_log_mel))
EndIf
Next
'hz=min_log_hz*Math.Exp(logstep*(mel-min_log_mel))
Returnfreqs
EndFunction
PublicFunctionmel_to_hz(melAsDouble,OptionalhtkAsBoolean=False)AsDouble
DimhzAsDouble
IfhtkThen
hz=700*(Math.Exp(mel)/1125)-1
Else
Dimf_minAsDouble=0.0
Dimf_spAsDouble=200.0/3
Dimfreqs=f_min+f_sp*mel
Dimmin_log_hzAsDouble=1000.0'beginningoflogregion(Hz)
Dimmin_log_melAsDouble=(min_log_hz-f_min)/f_sp'same(Mels)
DimlogstepAsDouble=Math.Log(6.4)/27.0
hz=min_log_hz*Math.Exp(logstep*(mel-min_log_mel))
'hz=min_log_hz*Math.Exp(logstep*(mel-min_log_mel))
EndIf
Returnhz
EndFunction
PublicFunctionfft_frequencies(srAsInteger,n_fftAsInteger)AsDouble()
Dimfft_data(n_fft/2)AsDouble
Fori=0Ton_fft/2
fft_data(i)=i*sr/n_fft
Next
Returnfft_data
EndFunction
'左右填充,优化
PublicFunctionPadReflect2(data()AsDouble,numAsInteger)
'pad10,10
Dimtim(data.Length-3)AsDouble
Fori=0Todata.Length-3
tim(i)=data(data.Length-2-i)
Next
Dimdump()AsDouble=data.Concat(tim).ToArray()
'ForEachiIndump
'Debug.Write(i)
EndFunction
PublicFunctionPadReflect(data()AsDouble,numAsInteger)
'pad10,10
Dimtim(data.Length-3)AsDouble
Fori=0Todata.Length-3
tim(i)=data(data.Length-2-i)
Next
Dimdump()AsDouble=data.Concat(tim).ToArray()
'ForEachiIndump
'Debug.Write(i)
'Next
'left_edge
'Debug.Print("***************************")
Dimleft_edge(num-1)AsDouble
_CopyDup(left_edge,dump,True)
'Fori=0Tonum-1
'Debug.Write(left_edge(i))
'Next
'right_edge
'Debug.Print("***************************")
Dimright_edge(num+data.Length)AsDouble
_CopyDup(right_edge,dump,False)
'Fori=0Tonum-1
'Debug.Write(right_edge(i))
'Next
'Debug.Print("***************************")
DimresultAsDouble()=left_edge.Concat(right_edge).ToArray()
Returnresult
EndFunction
'copytimtodatadumply
PublicFunction_CopyDup(data()AsDouble,tim()AsDouble,OptionalleftAsBoolean=True)
DimlastAsInteger=data.LengthModtim.Length
DimtimesAsInteger=Math.Floor(data.Length/tim.Length)
DimposAsInteger
IfleftThen
Array.Copy(tim,tim.Length-last,data,0,last)
pos=last
Fori=0Totimes-1
Array.Copy(tim,0,data,pos,tim.Length)
pos=pos+tim.Length
Next
Else
'RIGHT
pos=0
Fori=0Totimes-1
Array.Copy(tim,0,data,pos,tim.Length)
pos=pos+tim.Length
Next
Array.Copy(tim,0,data,pos,last)
EndIf
EndFunction
PublicFunctionGeneral_cosine(MAsInteger,alphaAsDouble(),symAsBoolean)AsDouble()
IfNotsymThen
M=M+1
EndIf
DimtimAsDouble=(2*pi)/(M-1)
Dimx(M)AsDouble
Dimw(M)AsDouble
'Debug.Print("ine")
Fori=0ToM-1
x(i)=-pi+tim*i
'Debug.Write(x(i)&"")
Next
'Debug.Print("******")
Fori=0Toalpha.GetLength(0)-1
Fork=0ToM-1
w(k)=w(k)+alpha(i)*Math.Cos(i*x(k))
'Debug.Write(w(k)&"")
Next
Next
Returnw
EndFunction
'''
'''汉明窗
'''
'''窗长
'''
PublicFunctionGeneral_hamming(MAsInteger)AsDouble()
DimdbAsDouble()={0.5,1-0.5}
ReturnGeneral_cosine(M,db,False)'进行加1,若sys为false
EndFunction
PublicFunctionGet_window(MAsInteger)AsDouble()
ReturnGeneral_hamming(M)
EndFunction
EndModule
ImportsSystem.IO
ImportsSystem.Numerics
ImportsTensorFlow
'Install-PackageTensorFlowSharp
PublicClassKeyWordDetect
DimgraphAsTFGraph
DimsessionAsTFSession
'加载模型
PublicSubNew()
DimmodelAsByte()=File.ReadAllBytes("f:\graph1.pb")
'导入GraphDef
graph=NewTFGraph()
graph.Import(model,"")
session=NewTFSession(graph)
'Threading.ThreadPool.SetMaxThreads(5,5)
EndSub
ProtectedOverridesSubfinalize()
session.CloseSession()
EndSub
'将声音数据变为mfccbyte数据
PublicFunctionDataBToMFCC(dataB()AsByte)AsDouble(,)
Dimbuff16(dataB.Length/2-1)AsInt16
Buffer.BlockCopy(dataB,0,buff16,0,dataB.Length-1)
Dimresult(,)AsDouble=MFCC(buff16)
Returnresult
EndFunction
'将声音数据变为mfcc
PublicFunctionDataToMFCC(dataI()AsInt16)AsDouble(,)
Dimresult(,)AsDouble=MFCC(dataI)
Returnresult
EndFunction
'将mfcc变为输入数据格式
PublicFunctionMFCCToVect(mfccAsDouble(,))AsDouble(,,)
Dimdata(0,1,129)AsDouble
DimnAsInteger=0,mAsInteger=0
Fori=0Tomfcc.GetLength(0)-1
Fork=0Tomfcc.GetLength(1)-1
data(0,m,n)=mfcc(i,k)
n=n+1
Next
Ifn=130Then
m=1
n=0
EndIf
Next
Returndata
EndFunction
Dimoutput
DimrunnerAsTFSession.Runner
Dimresult
Dimrshape
'关键字检测
PublicFunctionDetected(Data(,,)AsDouble)AsDouble
'DimtensorAsTFTensor=NewTFTensor(Data)
runner=session.GetRunner()
runner.AddInput(graph("input")(0),Data).Fetch(graph("out")(0))
output=runner.Run()
result=output(0)
rshape=result.Shape
DimrtAsDouble
rt=result.GetValue(True)(0)(0)
'Fork=0Torshape.GetValue(0)-1
'rt=result.GetValue(True)(k)(0)
''Debug.Print(rt)
'If(rt>0.8)Then
'Debug.Print("-----------recogxili")
''MsgBox("recgo")
'EndIf
'Next
ReturnRT
EndFunction
'PublicFunctionRunB(dataB()AsByte)
'DimmfccdAsDouble(,)=DataBToMFCC(dataB)
'DiminputxAsDouble(,,)=MFCCToVect(mfccd)
'Detected(inputx)
'EndFunction
'PublicFunctionThreadPoolRun(dataI()AsInt16)
'Threading.ThreadPool.QueueUserWorkItem(Run(dataI),dataI)
''Dimthrd1AsNewThreading.Thread(NewThreading.ParameterizedThreadStart(AddressOfRun))
''thrd1.Start(dataI)
'EndFunction
'DelegateFunctionDelgRun(dataI()AsInt16)
'PublicFunctionThreadRun(dataI()AsInt16)
''DimdrunAsNewDelgRun(AddressOfRun)
'Dimthrd1AsNewThreading.Thread(NewThreading.ParameterizedThreadStart(AddressOfRun))
'thrd1.Start(dataI)
'EndFunction
PublicFunctionRun(dataI()AsInt16)AsDouble
'Debug.Print("thread*****1")
DimmfccdAsDouble(,)=DataToMFCC(dataI)
DiminputxAsDouble(,,)=MFCCToVect(mfccd)
ReturnDetected(inputx)
EndFunction
PublicFunctionMFCC(buff16()AsInt16)AsDouble(,)
DimdatalenAsInteger=buff16.Length*2
Dimdouble_buff(datalen/2-1)AsDouble
DimlenAsInteger=datalen/2
Array.Copy(buff16,double_buff,len)
'******************
Fori=0Todouble_buff.Length-1
double_buff(i)=double_buff(i)/32768
'Debug.Print(double_buff(i))
Next
'汉明窗create
Dimhann_windowAsDouble()=Get_window(2048)
'Debug.Print("--------------------------")
'Debug.Print("hann_window**************")
ForEachiInhann_window
'Debug.Print(i&"")
Next
'Debug.Print("--------------------------")
'Debug.Print("*************padreflect**************")
DimyAsDouble()=PadReflect(double_buff,1024)
'DimyAsDouble()=double_buff
'ForEachiIny
''Debug.Print(i&"")
'Next
'Debug.Print("--------------------------")
'Debug.Print("***************frame************")
DimframsAsDouble(,)=Frame2(y)
DimtimAsInteger=frams.GetLength(0)
'Debug.Print("--------------------------")
'Debug.Print("**********hann*data**************")
DimhannData(tim-1,2047)AsDouble
Forn=0Totim-1
Fori=0To2048-1
hannData(n,i)=frams(n,i)*hann_window(i)
'Debug.Print(hannData(i)&"")
Next
Nextn
'\\\\\\\\\\\\\\\\melspecture
Dimspecturm1(,)AsComplex=FFT(hannData)
'Fori=0Tospecturm1.GetLength(0)-1
'Debug.Print("--------------------------------------")
'Debug.Print("--------------------------------------")
'Fork=0Tospecturm1.GetLength(1)-1
'Debug.Print(specturm1(i,k).Real&""&specturm1(i,k).Imaginary)
'Next
'Next
DimsAsDouble(,)=spectrum(specturm1)
Dimfftfreqs()AsDouble=fft_frequencies(16000,2048)
'Debug.Print("***************fftfreqs*****************")
'Debug.Print("***************fftfreqs*****************")
'Debug.Print("fftfreqs.shape",fftfreqs.Length)
'Fori=0Tofftfreqs.Length-1
''Debug.Write(fftfreqs(i)&"")
'Next
''''''''''''''''mel*specturm1
'Debug.Print("**************")
'Debug.Print("****滤波器创建**********")
Dimmel_fAsDouble()=MelFilter()
'Debug.Print("--------------------------")
'Debug.Print("hann_window**************")
'Debug.Print("diff")
DimfdiffAsDouble()=diff(mel_f)
Dimramps_AsDouble(,)=ramps(mel_f,fftfreqs)
Dimweights(,)AsDouble=weight(ramps_,fdiff)
normal(mel_f,weights)
'S*WEIGHT=melspectrogram
'weight128,1025
's5,1025
Dimmelspectrogram_(,)AsDouble=melspectrogram(weights,s)
Dimpower_to_db_AsDouble(,)=power_to_db(melspectrogram_)
Dimdct_AsDouble(,)=Dct(20,128)
Return_mfcc(dct_,power_to_db_)
EndFunction
EndClass
以上这篇对python中Librosa的mfcc步骤详解就是小编分享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持毛票票。