Java中的StringBuilder性能测试
在看KMP算法时,想要简单的统计一下执行时间和性能。
得出的结论是:Java的String的indexOf方法性能最好,其次是KMP算法,其次是传统的BF算法,当然,对比有点牵强,SUN的算法也使用Java来实现、用的看着不像是KMP,还需要详细研究一下。
测试代码如下所示:
packagecom.test.test.kmp;
importjava.util.Random;
publicclassKMPTest{
publicstaticvoidmain(String[]args){
test();
}
//
publicstaticvoidtest(){
//
intallLen=8000000;
intsubLen=11;
intcharLen=4;
Stringcl=buildString(charLen);
inttimes=50;
//
testMatch(allLen,subLen,cl,times);
}
publicstaticvoidtestMatch(intallLen,intsubLen,Stringcl,inttimes){
intallBF=0;
intallString=0;
intallKMP=0;
intallGC=0;
intallbuild=0;
intalltoArray=0;
longstart=System.currentTimeMillis();
System.out.println("--------------------------");
for(inti=0;i<times;i++){
//1.构造字符串的损耗
longbuildStart=System.currentTimeMillis();
Stringsub=buildString(subLen,cl);
Stringall=buildString(allLen,sub)+sub;
longbuildEnd=System.currentTimeMillis();
allbuild+=(buildEnd-buildStart);
//2.toCharArray的损耗
longtoArrayStart=System.currentTimeMillis();
char[]allArray=all.toCharArray();
char[]subArray=sub.toCharArray();
longtoArrayEnd=System.currentTimeMillis();
alltoArray+=(toArrayEnd-toArrayStart);
//
longstartBF=System.currentTimeMillis();
intindexBF=bfIndexOf(all,sub);
longendBF=System.currentTimeMillis();
//
longtimeoutBF=endBF-startBF;
allBF+=timeoutBF;
System.gc();
allGC+=(System.currentTimeMillis()-endBF);
//
longstartString=System.currentTimeMillis();
intindexString=stringIndexOf(all,sub);
longendString=System.currentTimeMillis();
//
longtimeoutString=endString-startString;
allString+=timeoutString;
System.gc();
allGC+=(System.currentTimeMillis()-endString);
//
longstartKMP=System.currentTimeMillis();
//intindexKMP=kmpIndexOf(all,sub);
intindexKMP=KMP_Index(allArray,subArray);
longendKMP=System.currentTimeMillis();
//
longtimeoutKMP=endKMP-startKMP;
allKMP+=timeoutKMP;
System.gc();
allGC+=(System.currentTimeMillis()-endKMP);
//
//System.out.println("all="+all.substring(0,100)+"......");
//System.out.println("sub="+sub);
//
//System.out.println("indexBF="+indexBF+";耗时:"+timeoutBF+"ms");
//System.out.println("indexString="+indexString+";耗时:"+timeoutString+"ms");
if(indexBF==indexString&&indexKMP==indexString){
//System.out.println("!!!!对比相等。");
}else{
thrownewRuntimeException("对比出错.");
}
//
if(i%20==10){
System.out.println(i+"次bfIndexOf="+allBF+"ms");
System.out.println(i+"次stringIndexOf="+allString+"ms");
System.out.println(i+"次KMPIndexOf="+allKMP+"ms");
System.out.println(i+"次allbuild="+allbuild+"ms");
System.out.println(i+"次alltoArray="+alltoArray+"ms");
System.out.println(i+"*3次,GC="+allGC+"ms");
longend=System.currentTimeMillis();
longallTime=end-start;
longlossTime=allTime-(allBF+allString+allKMP+allGC);
System.out.println(i+"次,所有总计耗时:"+(allTime)+"ms;损耗:"+lossTime+"ms;损耗比:"+((0.0+lossTime)/allTime*100)+"%");
System.out.println("--------------------------");
}
}
//
System.out.println(times+"次bfIndexOf;总计耗时:"+allBF+"ms");
System.out.println(times+"次stringIndexOf;总计耗时:"+allString+"ms");
System.out.println(times+"次KMPIndexOf;总计耗时:"+allKMP+"ms");
System.out.println(times+"次allbuild="+allbuild+"ms");
System.out.println(times+"次alltoArray="+alltoArray+"ms");
System.out.println(times+"*3次,GC;总计耗时:"+allGC+"ms");
longend=System.currentTimeMillis();
longallTime=end-start;
longlossTime=allTime-(allBF+allString+allKMP+allGC);
System.out.println(times+"次,所有总计耗时:"+(allTime)+"ms;损耗:"+lossTime+"ms;损耗比:"+((0.0+lossTime)/allTime*100)+"%");
System.out.println("--------------------------");
}
//
//构建字符
publicstaticStringbuildString(intlen){
returnbuildString(len,null);
}
publicstaticStringbuildString(intlen,Stringsub){
//
finalintTYPE_NUMBER=0;
finalintTYPE_LOWERCASE=1;
finalintTYPE_UPPERCASE=2;
//
longseed=System.nanoTime();
Randomrandom=newRandom(seed);
//
//
StringBuilderbuilder=newStringBuilder();
for(inti=0;i<len;i++){
//
inttype=random.nextInt(3);//0-2
intcvalue=0;
if(TYPE_NUMBER==type){
cvalue='0'+random.nextInt(10);//0~(n-1)
}elseif(TYPE_LOWERCASE==type){
cvalue='a'+random.nextInt(26);//0~(n-1)
}elseif(TYPE_UPPERCASE==type){
cvalue='A'+random.nextInt(26);//0~(n-1)
}else{
thrownewRuntimeException(Random.class.getName()+"#newxtInt(int);Wrong!");
}
//
//cvalue='A'+random.nextInt(26);//0~(n-1)
//
charc=(char)cvalue;
if(null!=sub&&sub.length()>1){
intindex=random.nextInt(sub.length());
c=sub.charAt(index);
}
//Strings=String.valueOf(c);
builder.append(c);
//
}
//
returnbuilder.toString();
}
/**
*Java自带的方法,内部实现了
*@paramall
*@paramsub
*@return
*/
publicstaticintstringIndexOf(Stringall,Stringsub){
//防御式编程
if(null==all||null==sub){
return-1;
}
//调用Java的String方法
returnall.indexOf(sub);
}
/**
*BF算法
*@paramall
*@paramsub
*@return
*/
publicstaticintbfIndexOf(Stringall,Stringsub){
//防御式编程
if(null==all||null==sub){
return-1;
}
//
intlenAll=all.length();
intlenSub=sub.length();
inti=0;
while(i<lenAll){
//从i下标开始对比
intj=0;
while(j<lenSub){
//
charall_i=all.charAt(i);
charsub_j=sub.charAt(j);
if(all_i==sub_j){
//相等则继续对比下一个字符
i++;
j++;//这个增长很重要
}else{
//否则跳出内层循环
break;
}
}
//如果j增长到和lenSub相等,则匹配成功
if(lenSub==j){
returni-lenSub;
}else{
i=1+i-j;//回溯i
}
}
//
return-1;
}
/**
*KMP算法
*@paramall
*@paramsub
*@return
*/
publicstaticintkmpIndexOf(Stringall,Stringsub){
//
char[]allArray=all.toCharArray();
char[]subArray=sub.toCharArray();
//
returnKMP_Index(allArray,subArray);
}
/**
*获得字符串的next函数值
*
*@paramt
*字符串
*@returnnext函数值
*/
publicstaticint[]next(char[]t){
int[]next=newint[t.length];
next[0]=-1;
inti=0;
intj=-1;
while(i<t.length-1){
if(j==-1||t[i]==t[j]){
i++;
j++;
if(t[i]!=t[j]){
next[i]=j;
}else{
next[i]=next[j];
}
}else{
j=next[j];
}
}
returnnext;
}
/**
*KMP匹配字符串
*
*@paramallArray
*主串
*@paramsubArray
*模式串
*@return若匹配成功,返回下标,否则返回-1
*/
publicstaticintKMP_Index(char[]allArray,char[]subArray){
int[]next=next(subArray);
inti=0;
intj=0;
while(i<=allArray.length-1&&j<=subArray.length-1){
if(j==-1||allArray[i]==subArray[j]){
i++;
j++;
}else{
j=next[j];
}
}
if(j<subArray.length){
return-1;
}else
returni-subArray.length;//返回模式串在主串中的头下标
}
}
测试的一个结果如下所示:
-------------------------- 10次bfIndexOf=94ms 10次stringIndexOf=56ms 10次KMPIndexOf=76ms 10次allbuild=5870ms 10次alltoArray=137ms 10*3次,GC=155ms 10次,所有总计耗时:6388ms;损耗:6007ms;损耗比:94.03569192235442% -------------------------- 30次bfIndexOf=365ms 30次stringIndexOf=222ms 30次KMPIndexOf=295ms 30次allbuild=16452ms 30次alltoArray=395ms 30*3次,GC=452ms 30次,所有总计耗时:18184ms;损耗:16850ms;损耗比:92.66388033435987% -------------------------- 50次bfIndexOf;总计耗时:550ms 50次stringIndexOf;总计耗时:335ms 50次KMPIndexOf;总计耗时:450ms 50次allbuild=26501ms 50次alltoArray=637ms 50*3次,GC;总计耗时:734ms 50次,所有总计耗时:29211ms;损耗:27142ms;损耗比:92.91705179555647% --------------------------
从中可以看出来,使用StringBuilder构造字符串,800万*50次append大约消耗了26秒。换算下来每秒钟是1600万次。。。
看来是需要写一个专门计时的函数,本来Junit是有自己的统计的,但是样本不太好做。
如此看来,数据的准备,也就是测试用例的选取很关键,可能需要先生成并写入到文本文件中。