基于java实现DFA算法代码实例
DFA简介
DFA全称为:DeterministicFiniteAutomaton,即确定有穷自动机。(自己百度吧)
直接代码:
敏感词实体类
packagecom.nopsmile.dfa; publicclassKeywords{ privateStringpid; privateStringContent; publicKeywords(){ } publicKeywords(Stringcontent){ super(); Content=content; } publicStringgetContent(){ returnContent; } publicvoidsetContent(Stringcontent){ Content=content; } publicStringgetPid(){ returnpid; } publicvoidsetPid(Stringpid){ this.pid=pid; } }
敏感词库初始化
packagecom.nopsmile.dfa; importjava.util.HashMap; importjava.util.HashSet; importjava.util.Iterator; importjava.util.List; importjava.util.Map; importjava.util.Set; /** *敏感词库初始化 * */ publicclassSensitiveWordInit{ /** *敏感词库 */ publicHashMapsensitiveWordMap; /** *初始化敏感词keywords */ publicMapinitKeyWord(ListsensitiveWords){ try{ //从敏感词集合对象中取出敏感词并封装到Set集合中 Set keyWordSet=newHashSet (); for(Keywordss:sensitiveWords){ keyWordSet.add(s.getContent().trim()); } //将敏感词库加入到HashMap中 addSensitiveWordToHashMap(keyWordSet); }catch(Exceptione){ e.printStackTrace(); } returnsensitiveWordMap; } /** *封装敏感词库 */ privatevoidaddSensitiveWordToHashMap(Set keyWordSet){ //初始化HashMap对象并控制容器的大小 sensitiveWordMap=newHashMap(keyWordSet.size()); //敏感词 Stringkey=null; //用来按照相应的格式保存敏感词库数据 MapnowMap=null; //用来辅助构建敏感词库 Map newWorMap=null; //使用一个迭代器来循环敏感词集合 Iterator iterator=keyWordSet.iterator(); while(iterator.hasNext()){ key=iterator.next(); //等于敏感词库,HashMap对象在内存中占用的是同一个地址,所以此nowMap对象的变化,sensitiveWordMap对象也会跟着改变 nowMap=sensitiveWordMap; for(inti=0;i (); newWorMap.put("isEnd","0"); nowMap.put(keyChar,newWorMap); nowMap=newWorMap; } //如果该字是当前敏感词的最后一个字,则标识为结尾字 if(i==key.length()-1){ nowMap.put("isEnd","1"); } } } } }
自定义的工具类
packagecom.nopsmile.dfa; importjava.util.ArrayList; importjava.util.Collections; importjava.util.Comparator; importjava.util.HashMap; importjava.util.HashSet; importjava.util.Iterator; importjava.util.LinkedHashMap; importjava.util.LinkedList; importjava.util.List; importjava.util.Map; importjava.util.Set; importcom.alibaba.fastjson.JSONArray; importnet.sf.json.JSONObject; /** *敏感词过滤工具类 * *@authorAlanLee * */ publicclassSensitivewordUtils{ /** *敏感词库 */ publicstaticMapsensitiveWordMap=null; /** *只过滤最小敏感词 */ publicstaticintminMatchTYpe=1; /** *过滤所有敏感词 */ publicstaticintmaxMatchType=2; /** *敏感词库敏感词数量 * *@return */ publicstaticintgetWordSize(){ if(SensitivewordUtils.sensitiveWordMap==null){ return0; } returnSensitivewordUtils.sensitiveWordMap.size(); } /** *是否包含敏感词 * */ publicstaticbooleanisContaintSensitiveWord(Stringtxt,intmatchType){ booleanflag=false; for(inti=0;i0){ flag=true; } } returnflag; } /** *获取敏感词内容 * *@paramtxt *@parammatchType *@return敏感词内容 */ publicstaticSet getSensitiveWord(Stringtxt,intmatchType){ Set sensitiveWordList=newHashSet (); for(inti=0;i 0){ //将检测出的敏感词保存到集合中 sensitiveWordList.add(txt.substring(i,i+length)); i=i+length-1; } } returnsensitiveWordList; } /** *替换敏感词 * */ publicstaticStringreplaceSensitiveWord(Stringtxt,intmatchType,StringreplaceChar){ StringresultTxt=txt; Set set=getSensitiveWord(txt,matchType); Iterator iterator=set.iterator(); Stringword=null; StringreplaceString=null; while(iterator.hasNext()){ word=iterator.next(); replaceString=getReplaceChars(replaceChar,word.length()); resultTxt=resultTxt.replaceAll(word,replaceString); } returnresultTxt; } /** *替换敏感词内容 * */ privatestaticStringgetReplaceChars(StringreplaceChar,intlength){ StringresultReplace=replaceChar; for(inti=1;i map=newHashMap (); for(inti=0;i 0){ //将检测出的敏感词保存到集合中 Stringstr=txt.substring(i,i+length); if(map.containsKey(str)){ map.put(str,map.get(str).intValue()+1); }else{ map.put(str,newInteger(1)); } //System.out.println(txt.substring(i,i+length)); i=i+length-1; } } returnmap; } /** *对map数组value排序,并取前10 *thismethodwillalwayssortthemap; *isConditionistrueconditioncanbeusedotherwiseinvalid *@paramunsortMap *@return */ publicstaticMap sortByValue(Map unsortMap,intcondition,booleanisCondition){ //1.ConvertMaptoListofMap List >list= newLinkedList >(unsortMap.entrySet()); //2.SortlistwithCollections.sort(),provideacustomComparator //Tryswitchtheo1o2positionforadifferentorder Collections.sort(list,newComparator >(){ publicintcompare(Map.Entry o1, Map.Entry o2){ return(o2.getValue()).compareTo(o1.getValue()); } }); //3.LoopthesortedlistandputitintoanewinsertionorderMapLinkedHashMap Map sortedMap=newLinkedHashMap (); if(isCondition){ for(inti=0;i 使用上面类流程代码
Keywordsss=newKeywords("好"); Listlist=newArrayList(); list.add(ss); SensitiveWordInitsensitiveWordInit=newSensitiveWordInit(); MapsensitiveWordMap=sensitiveWordInit.initKeyWord(list); //传入SensitivewordEngine类中的敏感词库 SensitivewordUtils.sensitiveWordMap=sensitiveWordMap; SensitivewordUtils.getSensitiveWordSum("需要检测的文本",2);以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持毛票票。
声明:本文内容来源于网络,版权归原作者所有,内容由互联网用户自发贡献自行上传,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任。如果您发现有涉嫌版权的内容,欢迎发送邮件至:czq8825#qq.com(发邮件时,请将#更换为@)进行举报,并提供相关证据,一经查实,本站将立刻删除涉嫌侵权内容。