[后缀数组]字符串k次最长子串问题相关

本篇blog主要求在字符串中出现k次的最长字串的相关问题。

首先知道后缀数组的性质:

设rk[i] < rk[j],有LCP(suffix(i),suffix(j)) = min{height[rk[i]],height[rk[i] + 1],…,height[rk[j - 1]],height[rk[j]]}

然后可以大力搞事。


Q1 求字符串中最长的至少重复两次,且可重叠的子串的长度。

首先,两个重叠字串最长长度等同于两个suffix的LCP的最大长度。

LCP长度一定是一段区间的height最大值,小于等于height的max。

Q2 求字符串中最长的至少重复两次,且不可重叠的子串的长度。

把问题转换为判定性问题。

二分长度L,把height按是否大于L分组。

找到位置的max和min看是否差值>L。

Q3 求字符串中最长的至少重复k次,且可重叠的子串的长度。

仍然是判定性问题。

二分长度L对height分组。

看是否存在区间长度>=k的。

Q4 有n个给定字符串,求最长的字符串,使其是至少一半的字符串的字串。

把字符串用其他不同字符作间隔拼成一个串,可以把问题转化成求字符串中最长的至少重复n/2次,且可重叠的子串的长度。

Code

Q2:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#include <cstdio>
#include <cstring>
#include <algorithm>
#include <cctype>
const int N = 20020,D = 100,C = 220;
inline int getint()
{
int r = 0,s = 1;char c = getchar();for(;!isdigit(c);c = getchar()) if(c == '-') s = 0;
for(;isdigit(c);c = getchar()) r = (((r << 2) + r) << 1) + (c ^ '0');return s ? r : -r;
}
struct SuffixArray
{
int x[N],y[N],sa[N],rk[N],buc[N],height[N];
void build(int *S,int n)
{
memset(buc,0,sizeof(buc));int m = C;
for(int i = 1;i <= n;i++) buc[x[i] = S[i]]++;
for(int i = 2;i <= m;i++) buc[i] += buc[i - 1];
for(int i = n;i;i--) sa[buc[x[i]]--] = i;
for(int k = 1;k <= n;k <<= 1)
{
int num = 0;
for(int i = n - k + 1;i <= n;i++) y[++num] = i;
for(int i = 1;i <= n;i++) if(sa[i] > k) y[++num] = sa[i] - k;
memset(buc,0,sizeof(buc));
for(int i = 1;i <= n;i++) ++buc[x[i]];
for(int i = 2;i <= m;i++) buc[i] += buc[i - 1];
for(int i = n;i;i--) sa[buc[x[y[i]]]--] = y[i],y[i] = 0;
std::swap(x,y);
x[sa[1]] = 1;num = 1;
for(int i = 2;i <= n;i++) x[sa[i]] = (y[sa[i]] == y[sa[i - 1]] && y[sa[i] + k] == y[sa[i - 1] + k]) ? num : ++num;
if(num == n) break;
m = num;
}
}
void build_height(int *S,int n)
{
int k = 0;
for(int i = 1;i <= n;i++) rk[sa[i]] = i;
for(int i = 1;i <= n;i++)
{
if(rk[i] == 1) continue;
if(k) k--;
int j = sa[rk[i] - 1];
while(i + k <= n && j + k <= n && S[i + k] == S[j + k]) k++;
height[rk[i]] = k;
}
}
}SA;
int S[N];int n;
inline bool judge(int k)
{
int mx = SA.sa[1],mn = SA.sa[1];
for(int i = 2;i <= n;i++)
{
if(SA.height[i] >= k && i < n)
{
mx = std::max(SA.sa[i],mx);
mn = std::min(SA.sa[i],mn);
continue;
}
if(mx - mn > k) return 1;
mx = mn = SA.sa[i];
}
return 0;
}
int main()
{
while(n = getint())
{
for(int i = 1;i <= n;i++) S[i] = getint();
if(n <= 4){puts("0");return 0;}
for(int i = 1;i < n;i++) S[i] = S[i + 1] + D - S[i];
S[n--] = 0;
SA.build(S,n);
SA.build_height(S,n);
int l = 4,r = n,ans = 0;
while(l < r)
{
int mid = (l + r) >> 1;
if(judge(mid))
{
ans = mid;
l = mid + 1;
}
else r = mid - 1;
}
ans++;
printf("%d\n",ans < 5 ? 0 : ans);
}
return 0;
}

Q3:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#include <cstdio>
#include <cstring>
#include <algorithm>
#include <cctype>
#include <set>
const int N = 20020,C = 1000001;
inline int getint()
{
int r = 0,s = 1;char c = getchar();for(;!isdigit(c);c = getchar()) if(c == '-') s = 0;
for(;isdigit(c);c = getchar()) r = (((r << 2) + r) << 1) + (c ^ '0');return s ? r : -r;
}
struct SuffixArray
{
int x[N],y[N],sa[N],rk[N],buc[C + 20],height[N];
void build(int *S,int n)
{
memset(buc,0,sizeof(buc));int m = C;
for(int i = 1;i <= n;i++) buc[x[i] = S[i]]++;
for(int i = 2;i <= m;i++) buc[i] += buc[i - 1];
for(int i = n;i;i--) sa[buc[x[i]]--] = i;
for(int k = 1;k <= n;k <<= 1)
{
int num = 0;
for(int i = n - k + 1;i <= n;i++) y[++num] = i;
for(int i = 1;i <= n;i++) if(sa[i] > k) y[++num] = sa[i] - k;
memset(buc,0,sizeof(buc));
for(int i = 1;i <= n;i++) ++buc[x[i]];
for(int i = 2;i <= m;i++) buc[i] += buc[i - 1];
for(int i = n;i;i--) sa[buc[x[y[i]]]--] = y[i],y[i] = 0;
std::swap(x,y);
x[sa[1]] = 1;num = 1;
for(int i = 2;i <= n;i++) x[sa[i]] = (y[sa[i]] == y[sa[i - 1]] && y[sa[i] + k] == y[sa[i - 1] + k]) ? num : ++num;
if(num == n) break;
m = num;
}
}
void build_height(int *S,int n)
{
int k = 0;
for(int i = 1;i <= n;i++) rk[sa[i]] = i;
for(int i = 1;i <= n;i++)
{
if(rk[i] == 1) continue;
if(k) k--;
int j = sa[rk[i] - 1];
while(i + k <= n && j + k <= n && S[i + k] == S[j + k]) k++;
height[rk[i]] = k;
}
}
}SA;
int S[N];int n,k;
inline bool judge(int m)
{
int cnt = 0;
for(int i = 1;i <= n;i++)
{
if(SA.height[i] >= m) cnt++;
else cnt = 0;
if(cnt >= k - 1) return 1;
}
return 0;
}
int main()
{
n = getint();k = getint();
for(int i = 1;i <= n;i++) S[i] = getint() + 1;
SA.build(S,n);
SA.build_height(S,n);
int l = 1,r = n,ans = 1;
while(l <= r)
{
int mid = (l + r) >> 1;
if(judge(mid)) l = mid + 1,ans = mid;
else r = mid - 1;
}
printf("%d\n",ans);
return 0;
}

Q4:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#include <cstdio>
#include <cstring>
#include <algorithm>
#include <cctype>
#include <set>
const int N = 110010,D = 30,C = 220;
inline int getint()
{
int r = 0,s = 1;char c = getchar();for(;!isdigit(c);c = getchar()) if(c == '-') s = 0;
for(;isdigit(c);c = getchar()) r = (((r << 2) + r) << 1) + (c ^ '0');return s ? r : -r;
}
struct SuffixArray
{
int x[N],y[N],sa[N],rk[N],buc[N],height[N];
void build(int *S,int n)
{
memset(buc,0,sizeof(buc));int m = C;
for(int i = 1;i <= n;i++) buc[x[i] = S[i]]++;
for(int i = 2;i <= m;i++) buc[i] += buc[i - 1];
for(int i = n;i;i--) sa[buc[x[i]]--] = i;
for(int k = 1;k <= n;k <<= 1)
{
int num = 0;
for(int i = n - k + 1;i <= n;i++) y[++num] = i;
for(int i = 1;i <= n;i++) if(sa[i] > k) y[++num] = sa[i] - k;
memset(buc,0,sizeof(buc));
for(int i = 1;i <= n;i++) ++buc[x[i]];
for(int i = 2;i <= m;i++) buc[i] += buc[i - 1];
for(int i = n;i;i--) sa[buc[x[y[i]]]--] = y[i],y[i] = 0;
std::swap(x,y);
x[sa[1]] = 1;num = 1;
for(int i = 2;i <= n;i++) x[sa[i]] = (y[sa[i]] == y[sa[i - 1]] && y[sa[i] + k] == y[sa[i - 1] + k]) ? num : ++num;
if(num == n) break;
m = num;
}
}
void build_height(int *S,int n)
{
int k = 0;
for(int i = 1;i <= n;i++) rk[sa[i]] = i;
for(int i = 1;i <= n;i++)
{
if(rk[i] == 1) continue;
if(k) k--;
int j = sa[rk[i] - 1];
while(i + k <= n && j + k <= n && S[i + k] == S[j + k]) k++;
height[rk[i]] = k;
}
}
}SA;
int S[N];int n,len;char buf[N];int idx[N];
inline bool judge(int k,int p)
{
std::set<int> vis;
vis.insert(idx[SA.sa[1]]);
for(int i = 2;i <= len;i++)
{
if(SA.height[i] >= k && i < len)
{
vis.insert(idx[SA.sa[i]]);
continue;
}
if(2 * vis.size() > n)
{
if(!p) return 1;
for(int j = 0;j < k;j++) printf("%c",S[SA.sa[i - 1] + j] + 'a' - 1);puts("");
}
vis.clear();
vis.insert(idx[SA.sa[i]]);
}
return 0;
}
int main()
{
int f = 0;
while(n = getint())
{
if(f) puts("");else f = 1;
if(n == 1) {scanf("%s",buf);printf("%s\n",buf);continue;}
int l = 1,r = 1;len = 0;
for(int i = 1;i <= n;i++)
{
scanf("%s",buf);int ll = strlen(buf);r = std::max(ll + 1,r);
for(int j = 0;j < ll;j++) S[++len] = buf[j] - 'a' + 1,idx[len] = i;
S[++len] = i + D;idx[len] = 0;
}
SA.build(S,len);
SA.build_height(S,len);
if(!judge(1,0)) {puts("?");continue;}
while(l < r)
{
int mid = (l + r) >> 1;
if(judge(mid,0)) l = mid + 1;
else r = mid;
}
judge(l - 1,1);
}
return 0;
}