开赛算法
Kasai的算法用于从后缀数组中获取最长公共前缀(LCP)数组。首先找到后缀数组。之后,Kasai的算法采用后缀数组列表来查找LCP。
对于LCP数组,它的取值为O(mlogn),其中m为模式长度,n为文本长度。Kasai算法使用O(n)来搜索主字符串中的模式。
输入输出
Input: Main String: “banana” Output: Suffix Array : 5 3 1 0 4 2 Common Prefix Array : 1 3 0 0 2 0
算法
buildSuffixArray(文本)
输入:主字符串
输出:后缀数组,从正文开始构建
Begin
n := size of text
for i := 0 to n, do
suffArray[i].index := i
suffArray[i].rank[0] := text[i]
if (i+1) < n, then
suffArray[i].rank[1] := text[i+1]
else
suffArray[i].rank[1] := -1
do
sort the suffix array
define index array to store indexes
for k := 4 to (2*n)-1, increase k by k*2, do
currRank := 0
prevRank := suffArray[0].rank[0]
suffArray[0].rank[0] := currRank
index[suffArray[0].index] = 0
for all character index i of text, do
if suffArray[i].rank[0] = prevRank AND suffArray[i].rank[1] =
suffArray[i-1].rank[1], then
prevRank := suffArray[i].rank[0]
suffArray[i].rank[0] := currRank
else
prevRank := suffArray[i].rank[0]
suffArray[i].rank[0] := currRank + 1
currRank := currRank + 1
index[suffArray[i].index] := i
done
for all character index i of text, do
nextIndex := suffArray[i].index + k/2
if nextIndex< n, then
suffArray[i].rank[1] := suffArray[index[nextIndex]].rank[0]
else
suffArray[i].rank[1] := -1
done
sort the suffArray
done
for all character index i of text, do
insert suffArray[i].index into suffVector
done
EndkasaiAlgorithm(text,suffVector)
输入- 主文本和后缀矢量作为后缀列表
输出:找到最长公共前缀的位置
Begin
n := size of suffVector
define longPrefix list of size n and fill all entries with 0
define suffInverse list of size n and fill all entries with 0
for all index values ‘i’ of suffVector, do
suffInverse[suffVector[i]] = i
done
k := 0
for i := 0 to n-1, do
if suffInverse[i] = n-1 then
k :=0
ignore the bottom part and go for next iteration.
j := suffVector[suffInverse[i]+1]
while (i+k)<n AND (j+k) < n and text[i+k] = text[j+k], do
increase k by 1
done
longPrefix[suffInverse[i]] := k
if k > 0 then
decrease k by 1
done
return longPrefix
End示例
#include<iostream>
#include<vector>
#include<algorithm>
using namespace std;
struct suffix {
int index;
int rank[2]; // To store rank pair
};
bool compare(suffix s1, suffix s2) { //compare suffixes for sort function
if(s1.rank[0] == s2.rank[0]) {
if(s1.rank[1] < s2.rank[1])
return true;
else
return false;
}else {
if(s1.rank[0] < s2.rank[0])
return true;
else
return false;
}
}
vector<int> buildSuffixArray(string mainString) {
int n = mainString.size();
suffix suffixArray[n];
for (int i = 0; i < n; i++) {
suffixArray[i].index = i;
suffixArray[i].rank[0] = mainString[i] - 'a'; //store old rank
suffixArray[i].rank[1] = ((i+1)<n)?(mainString[i+1]-'a'):-1; //rank after alphabetical ordering
}
sort(suffixArray, suffixArray+n, compare); //sort suffix array upto first 2 characters
int index[n]; //index in suffixArray
for (int k = 4; k < 2*n; k = k*2) { //increase k as power of 2
int currRank = 0;
int prevRank = suffixArray[0].rank[0];
suffixArray[0].rank[0] = currRank;
index[suffixArray[0].index] = 0;
for (int i = 1; i < n; i++) { // Assigning rank to all suffix
if (suffixArray[i].rank[0] == prevRank && suffixArray[i].rank[1] == suffixArray[i-1].rank[1]) {
prevRank = suffixArray[i].rank[0];
suffixArray[i].rank[0] = currRank;
} else{ //increment rank and assign
prevRank = suffixArray[i].rank[0];
suffixArray[i].rank[0] = ++currRank;
}
index[suffixArray[i].index] = i;
}
for (int i = 0; i < n; i++) { // Assign next rank to every suffix
int nextIndex = suffixArray[i].index + k/2;
suffixArray[i].rank[1] = (nextIndex < n)? suffixArray[index[nextIndex]].rank[0]: -1;
}
sort(suffixArray, suffixArray+n, compare); //sort upto first k characters
}
vector<int>suffixVector;
for (int i = 0; i < n; i++)
suffixVector.push_back(suffixArray[i].index); //index of all suffix to suffix vector
return suffixVector;
}
vector<int> kasaiAlgorithm(string mainString, vector<int> suffixVector) {
int n = suffixVector.size();
vector<int> longPrefix(n, 0); //size n and initialize with 0
vector<int> suffixInverse(n, 0);
for (int i=0; i < n; i++)
suffixInverse[suffixVector[i]] = i; //fill values of inverse Suffix list
int k = 0;
for (int i=0; i<n; i++) { //for all suffix in main string
if (suffixInverse[i] == n-1) { //when suffix at position (n-1)
k = 0;
continue;
}
int j = suffixVector[suffixInverse[i]+1]; //nest string of suffix list
while (i+k<n && j+k<n && mainString[i+k]==mainString[j+k]) //start from kth index
k++;
longPrefix[suffixInverse[i]] = k; // prefix for the current suffix.
if (k>0)
k--; //remofe first character of string
}
return longPrefix;
}
void showArray(vector<int> vec) {
vector<int>::iterator it;
for (it = vec.begin(); it < vec.end() ; it++)
cout << *it << " ";
cout << endl;
}
int main() {
string mainString = "banana";
vector<int>suffixArray = buildSuffixArray(mainString);
int n = suffixArray.size();
cout<< "Suffix Array : "<<endl;
showArray(suffixArray);
vector<int>commonPrefix = kasaiAlgorithm(mainString, suffixArray);
cout<< "\nCommon Prefix Array : "<<endl;
showArray(commonPrefix);
}输出结果
Suffix Array : 5 3 1 0 4 2 Common Prefix Array : 1 3 0 0 2 0