基于java实现DFA算法代码实例
DFA简介
DFA全称为:DeterministicFiniteAutomaton,即确定有穷自动机。(自己百度吧)
直接代码:
敏感词实体类
packagecom.nopsmile.dfa;
publicclassKeywords{
privateStringpid;
privateStringContent;
publicKeywords(){
}
publicKeywords(Stringcontent){
super();
Content=content;
}
publicStringgetContent(){
returnContent;
}
publicvoidsetContent(Stringcontent){
Content=content;
}
publicStringgetPid(){
returnpid;
}
publicvoidsetPid(Stringpid){
this.pid=pid;
}
}
敏感词库初始化
packagecom.nopsmile.dfa;
importjava.util.HashMap;
importjava.util.HashSet;
importjava.util.Iterator;
importjava.util.List;
importjava.util.Map;
importjava.util.Set;
/**
*敏感词库初始化
*
*/
publicclassSensitiveWordInit{
/**
*敏感词库
*/
publicHashMapsensitiveWordMap;
/**
*初始化敏感词keywords
*/
publicMapinitKeyWord(ListsensitiveWords){
try{
//从敏感词集合对象中取出敏感词并封装到Set集合中
SetkeyWordSet=newHashSet();
for(Keywordss:sensitiveWords){
keyWordSet.add(s.getContent().trim());
}
//将敏感词库加入到HashMap中
addSensitiveWordToHashMap(keyWordSet);
}catch(Exceptione){
e.printStackTrace();
}
returnsensitiveWordMap;
}
/**
*封装敏感词库
*/
privatevoidaddSensitiveWordToHashMap(SetkeyWordSet){
//初始化HashMap对象并控制容器的大小
sensitiveWordMap=newHashMap(keyWordSet.size());
//敏感词
Stringkey=null;
//用来按照相应的格式保存敏感词库数据
MapnowMap=null;
//用来辅助构建敏感词库
MapnewWorMap=null;
//使用一个迭代器来循环敏感词集合
Iteratoriterator=keyWordSet.iterator();
while(iterator.hasNext()){
key=iterator.next();
//等于敏感词库,HashMap对象在内存中占用的是同一个地址,所以此nowMap对象的变化,sensitiveWordMap对象也会跟着改变
nowMap=sensitiveWordMap;
for(inti=0;i();
newWorMap.put("isEnd","0");
nowMap.put(keyChar,newWorMap);
nowMap=newWorMap;
}
//如果该字是当前敏感词的最后一个字,则标识为结尾字
if(i==key.length()-1){
nowMap.put("isEnd","1");
}
}
}
}
}
自定义的工具类
packagecom.nopsmile.dfa;
importjava.util.ArrayList;
importjava.util.Collections;
importjava.util.Comparator;
importjava.util.HashMap;
importjava.util.HashSet;
importjava.util.Iterator;
importjava.util.LinkedHashMap;
importjava.util.LinkedList;
importjava.util.List;
importjava.util.Map;
importjava.util.Set;
importcom.alibaba.fastjson.JSONArray;
importnet.sf.json.JSONObject;
/**
*敏感词过滤工具类
*
*@authorAlanLee
*
*/
publicclassSensitivewordUtils{
/**
*敏感词库
*/
publicstaticMapsensitiveWordMap=null;
/**
*只过滤最小敏感词
*/
publicstaticintminMatchTYpe=1;
/**
*过滤所有敏感词
*/
publicstaticintmaxMatchType=2;
/**
*敏感词库敏感词数量
*
*@return
*/
publicstaticintgetWordSize(){
if(SensitivewordUtils.sensitiveWordMap==null){
return0;
}
returnSensitivewordUtils.sensitiveWordMap.size();
}
/**
*是否包含敏感词
*
*/
publicstaticbooleanisContaintSensitiveWord(Stringtxt,intmatchType){
booleanflag=false;
for(inti=0;i0){
flag=true;
}
}
returnflag;
}
/**
*获取敏感词内容
*
*@paramtxt
*@parammatchType
*@return敏感词内容
*/
publicstaticSetgetSensitiveWord(Stringtxt,intmatchType){
SetsensitiveWordList=newHashSet();
for(inti=0;i0){
//将检测出的敏感词保存到集合中
sensitiveWordList.add(txt.substring(i,i+length));
i=i+length-1;
}
}
returnsensitiveWordList;
}
/**
*替换敏感词
*
*/
publicstaticStringreplaceSensitiveWord(Stringtxt,intmatchType,StringreplaceChar){
StringresultTxt=txt;
Setset=getSensitiveWord(txt,matchType);
Iteratoriterator=set.iterator();
Stringword=null;
StringreplaceString=null;
while(iterator.hasNext()){
word=iterator.next();
replaceString=getReplaceChars(replaceChar,word.length());
resultTxt=resultTxt.replaceAll(word,replaceString);
}
returnresultTxt;
}
/**
*替换敏感词内容
*
*/
privatestaticStringgetReplaceChars(StringreplaceChar,intlength){
StringresultReplace=replaceChar;
for(inti=1;imap=newHashMap();
for(inti=0;i0){
//将检测出的敏感词保存到集合中
Stringstr=txt.substring(i,i+length);
if(map.containsKey(str)){
map.put(str,map.get(str).intValue()+1);
}else{
map.put(str,newInteger(1));
}
//System.out.println(txt.substring(i,i+length));
i=i+length-1;
}
}
returnmap;
}
/**
*对map数组value排序,并取前10
*thismethodwillalwayssortthemap;
*isConditionistrueconditioncanbeusedotherwiseinvalid
*@paramunsortMap
*@return
*/
publicstaticMapsortByValue(MapunsortMap,intcondition,booleanisCondition){
//1.ConvertMaptoListofMap
List>list=
newLinkedList>(unsortMap.entrySet());
//2.SortlistwithCollections.sort(),provideacustomComparator
//Tryswitchtheo1o2positionforadifferentorder
Collections.sort(list,newComparator>(){
publicintcompare(Map.Entryo1,
Map.Entryo2){
return(o2.getValue()).compareTo(o1.getValue());
}
});
//3.LoopthesortedlistandputitintoanewinsertionorderMapLinkedHashMap
MapsortedMap=newLinkedHashMap();
if(isCondition){
for(inti=0;i
使用上面类流程代码
Keywordsss=newKeywords("好");
Listlist=newArrayList();
list.add(ss);
SensitiveWordInitsensitiveWordInit=newSensitiveWordInit();
MapsensitiveWordMap=sensitiveWordInit.initKeyWord(list);
//传入SensitivewordEngine类中的敏感词库
SensitivewordUtils.sensitiveWordMap=sensitiveWordMap;
SensitivewordUtils.getSensitiveWordSum("需要检测的文本",2);
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持毛票票。
声明:本文内容来源于网络,版权归原作者所有,内容由互联网用户自发贡献自行上传,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任。如果您发现有涉嫌版权的内容,欢迎发送邮件至:czq8825#qq.com(发邮件时,请将#更换为@)进行举报,并提供相关证据,一经查实,本站将立刻删除涉嫌侵权内容。