python正向最大匹配分词和逆向最大匹配分词的实例
正向最大匹配
#-*-coding:utf-8-*-
CODEC='utf-8'
defu(s,encoding):
'convertedotherencodingtounicodeencoding'
ifisinstance(s,unicode):
returns
else:
returnunicode(s,encoding)
deffwd_mm_seg(wordDict,maxLen,str):
'forwardmaxmatchsegment'
wordList=[]
segStr=str
segStrLen=len(segStr)
forwordinwordDict:
print'word:',word
print"\n"
whilesegStrLen>0:
ifsegStrLen>maxLen:
wordLen=maxLen
else:
wordLen=segStrLen
subStr=segStr[0:wordLen]
print"subStr:",subStr
whilewordLen>1:
ifsubStrinwordDict:
print"subStr1:%r"%subStr
break
else:
print"subStr2:%r"%subStr
wordLen=wordLen-1
subStr=subStr[0:wordLen]
#print"subStr3:",subStr
wordList.append(subStr)
segStr=segStr[wordLen:]
segStrLen=segStrLen-wordLen
forwordstrinwordList:
print"wordstr:",wordstr
returnwordList
defmain():
fp_dict=open('words.dic')
wordDict={}
foreachWordinfp_dict:
wordDict[u(eachWord.strip(),'utf-8')]=1
segStr=u'你好世界helloworld'
printsegStr
wordList=fwd_mm_seg(wordDict,10,segStr)
print"==".join(wordList)
if__name__=='__main__':
main()
逆向最大匹配
#-*-coding:utf-8-*-
defu(s,encoding):
'convertedotherencodingtounicodeencoding'
ifisinstance(s,unicode):
returns
else:
returnunicode(s,encoding)
CODEC='utf-8'
defbwd_mm_seg(wordDict,maxLen,str):
'forwardmaxmatchsegment'
wordList=[]
segStr=str
segStrLen=len(segStr)
forwordinwordDict:
print'word:',word
print"\n"
whilesegStrLen>0:
ifsegStrLen>maxLen:
wordLen=maxLen
else:
wordLen=segStrLen
subStr=segStr[-wordLen:None]
print"subStr:",subStr
whilewordLen>1:
ifsubStrinwordDict:
print"subStr1:%r"%subStr
break
else:
print"subStr2:%r"%subStr
wordLen=wordLen-1
subStr=subStr[-wordLen:None]
#print"subStr3:",subStr
wordList.append(subStr)
segStr=segStr[0:-wordLen]
segStrLen=segStrLen-wordLen
wordList.reverse()
forwordstrinwordList:
print"wordstr:",wordstr
returnwordList
defmain():
fp_dict=open('words.dic')
wordDict={}
foreachWordinfp_dict:
wordDict[u(eachWord.strip(),'utf-8')]=1
segStr=ur'你好世界helloworld'
printsegStr
wordList=bwd_mm_seg(wordDict,10,segStr)
print"==".join(wordList)
if__name__=='__main__':
main()
以上这篇python正向最大匹配分词和逆向最大匹配分词的实例就是小编分享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持毛票票。