Aquí hay una forma de obtener el factor de registro. Sea dp[i][j]
verdad si podemos alcanzar la subcadena s[i..j]
. Entonces:
dp[0][length(s)-1] ->
true
dp[0][j] ->
if s[0] != s[j+1]:
false
else:
true if any dp[0][k]
for j < k ≤ (j + longestMatchRight[0][j+1])
(The longest match we can use is
also bound by the current range.)
(Initialise left side similarly.)
Ahora itera desde afuera en:
for i = 1 to length(s)-2:
for j = length(s)-2 to i:
dp[i][j] ->
// We removed on the right
if s[i] != s[j+1]:
false
else:
true if any dp[i][k]
for j < k ≤ (j + longestMatchRight[i][j+1])
// We removed on the left
if s[i-1] != s[j]:
true if dp[i][j]
else:
true if any dp[k][j]
for (i - longestMatchLeft[i-1][j]) ≤ k < i
Podemos calcular previamente el partido más largo de cada par a partir (i, j)
de O(n^2)
la recurrencia,
longest(i, j) ->
if s[i] == s[j]:
return 1 + longest(i + 1, j + 1)
else:
return 0
Esto nos permitiría verificar una coincidencia de subcadena que comience en los índices i
y j
en O(1)
. (Necesitamos ambas direcciones, derecha e izquierda).
Cómo obtener el factor de registro
Podemos pensar en una forma de crear una estructura de datos que nos permita determinar si
any dp[i][k]
for j < k ≤ (j + longestMatchRight[i][j+1])
(And similarly for the left side.)
en O(log n)
, considerando que ya hemos visto esos valores.
Aquí está el código C ++ con árboles de segmentos (para consultas derecha e izquierda, por lo tanto O(n^2 * log n)
) que incluye el generador de prueba de Bananon. Para 5000 caracteres "a", se ejecutó en 3.54s, 420 MB ( https://ideone.com/EIrhnR ). Para reducir la memoria, uno de los árboles de segmentos se implementa en una sola fila (todavía necesito investigar hacer lo mismo con las consultas del lado izquierdo para reducir aún más la memoria).
#include <iostream>
#include <string>
#include <ctime>
#include <random>
#include <algorithm> // std::min
using namespace std;
const int MAX_N = 5000;
int seg[2 * MAX_N];
int segsL[MAX_N][2 * MAX_N];
int m[MAX_N][MAX_N][2];
int dp[MAX_N][MAX_N];
int best;
// Adapted from https://codeforces.com/blog/entry/18051
void update(int n, int p, int value) { // set value at position p
for (seg[p += n] = value; p > 1; p >>= 1)
seg[p >> 1] = seg[p] + seg[p ^ 1];
}
// Adapted from https://codeforces.com/blog/entry/18051
int query(int n, int l, int r) { // sum on interval [l, r)
int res = 0;
for (l += n, r += n; l < r; l >>= 1, r >>= 1) {
if (l & 1) res += seg[l++];
if (r & 1) res += seg[--r];
}
return res;
}
// Adapted from https://codeforces.com/blog/entry/18051
void updateL(int n, int i, int p, int value) { // set value at position p
for (segsL[i][p += n] = value; p > 1; p >>= 1)
segsL[i][p >> 1] = segsL[i][p] + segsL[i][p ^ 1];
}
// Adapted from https://codeforces.com/blog/entry/18051
int queryL(int n, int i, int l, int r) { // sum on interval [l, r)
int res = 0;
for (l += n, r += n; l < r; l >>= 1, r >>= 1) {
if (l & 1) res += segsL[i][l++];
if (r & 1) res += segsL[i][--r];
}
return res;
}
// Code by גלעד ברקן
void precalc(int n, string & s) {
int i, j;
for (i = 0; i < n; i++) {
for (j = 0; j < n; j++) {
// [longest match left, longest match right]
m[i][j][0] = (s[i] == s[j]) & 1;
m[i][j][1] = (s[i] == s[j]) & 1;
}
}
for (i = n - 2; i >= 0; i--)
for (j = n - 2; j >= 0; j--)
m[i][j][1] = s[i] == s[j] ? 1 + m[i + 1][j + 1][1] : 0;
for (i = 1; i < n; i++)
for (j = 1; j < n; j++)
m[i][j][0] = s[i] == s[j] ? 1 + m[i - 1][j - 1][0] : 0;
}
// Code by גלעד ברקן
void f(int n, string & s) {
int i, j, k, longest;
dp[0][n - 1] = 1;
update(n, n - 1, 1);
updateL(n, n - 1, 0, 1);
// Right side initialisation
for (j = n - 2; j >= 0; j--) {
if (s[0] == s[j + 1]) {
longest = std::min(j + 1, m[0][j + 1][1]);
for (k = j + 1; k <= j + longest; k++)
dp[0][j] |= dp[0][k];
if (dp[0][j]) {
update(n, j, 1);
updateL(n, j, 0, 1);
best = std::min(best, j + 1);
}
}
}
// Left side initialisation
for (i = 1; i < n; i++) {
if (s[i - 1] == s[n - 1]) {
// We are bound by the current range
longest = std::min(n - i, m[i - 1][n - 1][0]);
for (k = i - 1; k >= i - longest; k--)
dp[i][n - 1] |= dp[k][n - 1];
if (dp[i][n - 1]) {
updateL(n, n - 1, i, 1);
best = std::min(best, n - i);
}
}
}
for (i = 1; i <= n - 2; i++) {
for (int ii = 0; ii < MAX_N; ii++) {
seg[ii * 2] = 0;
seg[ii * 2 + 1] = 0;
}
update(n, n - 1, dp[i][n - 1]);
for (j = n - 2; j >= i; j--) {
// We removed on the right
if (s[i] == s[j + 1]) {
// We are bound by half the current range
longest = std::min(j - i + 1, m[i][j + 1][1]);
//for (k=j+1; k<=j+longest; k++)
//dp[i][j] |= dp[i][k];
if (query(n, j + 1, j + longest + 1)) {
dp[i][j] = 1;
update(n, j, 1);
updateL(n, j, i, 1);
}
}
// We removed on the left
if (s[i - 1] == s[j]) {
// We are bound by half the current range
longest = std::min(j - i + 1, m[i - 1][j][0]);
//for (k=i-1; k>=i-longest; k--)
//dp[i][j] |= dp[k][j];
if (queryL(n, j, i - longest, i)) {
dp[i][j] = 1;
updateL(n, j, i, 1);
update(n, j, 1);
}
}
if (dp[i][j])
best = std::min(best, j - i + 1);
}
}
}
int so(string s) {
for (int i = 0; i < MAX_N; i++) {
seg[i * 2] = 0;
seg[i * 2 + 1] = 0;
for (int j = 0; j < MAX_N; j++) {
segsL[i][j * 2] = 0;
segsL[i][j * 2 + 1] = 0;
m[i][j][0] = 0;
m[i][j][1] = 0;
dp[i][j] = 0;
}
}
int n = s.length();
best = n;
precalc(n, s);
f(n, s);
return best;
}
// End code by גלעד ברקן
// Code by Bananon =======================================================================
int result;
int lps[MAX_N][MAX_N];
bool checked[MAX_N][MAX_N];
void check(int start, int length) {
checked[start][length] = true;
if (length < result) {
result = length;
}
for (int i = lps[start][length]; i != 0; i = lps[start][i - 1]) {
int newLength = length - i;
if (!checked[start][newLength])
check(start, newLength);
int newStart = start + i;
if (!checked[newStart][newLength])
check(newStart, newLength);
}
}
int my(string str) {
int n = str.length();
for (int l = 0; l < n; l++) {
int subLength = n - l;
lps[l][0] = 0;
checked[l][0] = false;
for (int i = 1; i < subLength; ++i) {
int j = lps[l][i - 1];
while (j > 0 && str[i + l] != str[j + l])
j = lps[l][j - 1];
if (str[i + l] == str[j + l]) j++;
lps[l][i] = j;
checked[l][i] = false;
}
}
result = n - 1;
check(0, n - 1);
return result + 1;
}
// generate =================================================================
bool rndBool() {
return rand() % 2 == 0;
}
int rnd(int bound) {
return rand() % bound;
}
void untrim(string & str) {
int length = rnd(str.length());
int prefixLength = rnd(str.length()) + 1;
if (rndBool())
str.append(str.substr(0, prefixLength));
else {
string newStr = str.substr(str.length() - prefixLength, prefixLength);
newStr.append(str);
str = newStr;
}
}
void rndTest(int minTestLength, string s) {
while (s.length() < minTestLength)
untrim(s);
int myAns = my(s);
int soAns = so(s);
cout << myAns << " " << soAns << '\n';
if (soAns != myAns) {
cout << s;
exit(0);
}
}
int main() {
int minTestLength;
cin >> minTestLength;
string seed;
cin >> seed;
while (true)
rndTest(minTestLength, seed);
}
Y aquí está el código JavaScript (sin la mejora del factor de registro) para mostrar que la recurrencia funciona. (Para obtener el factor de registro, reemplazamos los k
bucles internos con una consulta de rango único).
debug = 1
function precalc(s){
let m = new Array(s.length)
for (let i=0; i<s.length; i++){
m[i] = new Array(s.length)
for (let j=0; j<s.length; j++){
// [longest match left, longest match right]
m[i][j] = [(s[i] == s[j]) & 1, (s[i] == s[j]) & 1]
}
}
for (let i=s.length-2; i>=0; i--)
for (let j=s.length-2; j>=0; j--)
m[i][j][1] = s[i] == s[j] ? 1 + m[i+1][j+1][1] : 0
for (let i=1; i<s.length; i++)
for (let j=1; j<s.length; j++)
m[i][j][0] = s[i] == s[j] ? 1 + m[i-1][j-1][0] : 0
return m
}
function f(s){
m = precalc(s)
let n = s.length
let min = s.length
let dp = new Array(s.length)
for (let i=0; i<s.length; i++)
dp[i] = new Array(s.length).fill(0)
dp[0][s.length-1] = 1
// Right side initialisation
for (let j=s.length-2; j>=0; j--){
if (s[0] == s[j+1]){
let longest = Math.min(j + 1, m[0][j+1][1])
for (let k=j+1; k<=j+longest; k++)
dp[0][j] |= dp[0][k]
if (dp[0][j])
min = Math.min(min, j + 1)
}
}
// Left side initialisation
for (let i=1; i<s.length; i++){
if (s[i-1] == s[s.length-1]){
let longest = Math.min(s.length - i, m[i-1][s.length-1][0])
for (let k=i-1; k>=i-longest; k--)
dp[i][s.length-1] |= dp[k][s.length-1]
if (dp[i][s.length-1])
min = Math.min(min, s.length - i)
}
}
for (let i=1; i<=s.length-2; i++){
for (let j=s.length-2; j>=i; j--){
// We removed on the right
if (s[i] == s[j+1]){
// We are bound by half the current range
let longest = Math.min(j - i + 1, m[i][j+1][1])
for (let k=j+1; k<=j+longest; k++)
dp[i][j] |= dp[i][k]
}
// We removed on the left
if (s[i-1] == s[j]){
// We are bound by half the current range
let longest = Math.min(j - i + 1, m[i-1][j][0])
for (let k=i-1; k>=i-longest; k--)
dp[i][j] |= dp[k][j]
}
if (dp[i][j])
min = Math.min(min, j - i + 1)
}
}
if (debug){
let str = ""
for (let row of dp)
str += row + "\n"
console.log(str)
}
return min
}
function main(s){
var strs = [
"caaca",
"bbabbbba",
"baabbabaa",
"bbabbba",
"bbbabbbbba",
"abbabaabbab",
"abbabaabbaba",
"aabaabaaabaab",
"bbabbabbb"
]
for (let s of strs){
let t = new Date
console.log(s)
console.log(f(s))
//console.log((new Date - t)/1000)
console.log("")
}
}
main()