博耶摩尔算法
这是博耶摩尔算法的另一种方法。有时将其称为“良好后缀启发式”方法。对于这种情况,将创建一个预处理表作为后缀表。在此过程中,从模式的最后一个字符搜索子字符串或模式。当主字符串的子字符串与模式的子字符串匹配时,它将移动以查找匹配子字符串的其他出现。它还可以移动以找到模式的前缀,该模式是主字符串的后缀。否则,它将移动图案的整个长度。
输入输出
Input: Main String: “ABAAABCDBBABCDDEBCABC”, Pattern: “ABC” Output: Pattern found at position: 4 Pattern found at position: 10 Pattern found at position: 18
算法
fullSuffixMatch(shiftArray,borderArray,pattern)
输入-数组以存储班次位置,边框数组和要搜索的图案。
输出-填充shift数组和border数组。
Begin
n := pattern length
j := n
j := n+1
borderArray[i] := j
while i > 0, do
while j <= n AND pattern[i-1] ≠ pattern[j-1], do
if shiftArray[j] = 0, then
shiftArray[j] := j-i;
j := borderArray[j];
done
decrease i and j by 1
borderArray[i] := j
done
EndpartialSuffixMatch(shiftArray,borderArray,pattern)
输入-用于存储班次位置的数组,边界数组和要搜索的样式。
输出-填充shift数组和border数组。
Begin
n := pattern length
j := borderArray[0]
for index of all characters ‘i’ of pattern, do
if shiftArray[i] = 0, then
shiftArray[i] := j
if i = j then
j := borderArray[j]
done
EndsearchPattern(文字,图案)
输入: 将要搜索的主要文本和样式
输出- 找到模式的索引
Begin
patLen := pattern length
strLen := text size
for all entries of shiftArray, do
set all entries to 0
done
call fullSuffixMatch(shiftArray, borderArray, pattern)
call partialSuffixMatch(shiftArray, borderArray, pattern)
shift := 0
while shift <= (strLen - patLen), do
j := patLen -1
while j >= 0 and pattern[j] = text[shift + j], do
decrease j by 1
done
if j < 0, then
print the shift as, there is a match
shift := shift + shiftArray[0]
else
shift := shift + shiftArray[j+1]
done
End示例
#include<iostream>
using namespace std;
void fullSuffixMatch(int shiftArr[], int borderArr[], string pattern) {
int n = pattern.size(); //find length of pattern
int i = n;
int j = n+1;
borderArr[i] = j;
while(i > 0) {
//当第(i-1)和第(j-1)项不相同时向右搜索
while(j <= n && pattern[i-1] != pattern[j-1] ) {
if(shiftArr[j] == 0)
shiftArr[j] = j-i; //shift pattern from i to j
j = borderArr[j]; //update border
}
i--;
j--;
borderArr[i] = j;
}
}
void partialSuffixMatch(int shiftArr[], int borderArr[], string pattern) {
int n = pattern.size(); //find length of pattern
int j;
j = borderArr[0];
for(int i = 0; i<n; i++) {
if(shiftArr[i] == 0)
shiftArr[i] = j; //when shift is 0, set shift to border value
if(i == j)
j = borderArr[j]; //update border value
}
}
void searchPattern(string mainString, string pattern, int array[], int *index) {
int patLen = pattern.size();
int strLen = mainString.size();
int borderArray[patLen+1];
int shiftArray[patLen + 1];
for(int i = 0; i<=patLen; i++) {
shiftArray[i] = 0; //set all shift array to 0
}
fullSuffixMatch(shiftArray, borderArray, pattern);
partialSuffixMatch(shiftArray, borderArray, pattern);
int shift = 0;
while(shift <= (strLen - patLen)) {
int j = patLen - 1;
while(j >= 0 && pattern[j] == mainString[shift+j]) {
j--; //reduce j when pattern and main string character is matching
}
if(j < 0) {
(*index)++;
array[(*index)] = shift;
shift += shiftArray[0];
}else {
shift += shiftArray[j+1];
}
}
}
int main() {
string mainString = "ABAAABCDBBABCDDEBCABC";
string pattern = "ABC";
int locArray[mainString.size()];
int index = -1;
searchPattern(mainString, pattern, locArray, &index);
for(int i = 0; i <= index; i++) {
cout << "Pattern found at position: " << locArray[i]<<endl;
}
}输出结果
Pattern found at position: 4 Pattern found at position: 10 Pattern found at position: 18