C ++, cuadrícula de 27243 caracteres (248x219, relleno 50.2%)
(Publicar esto como una nueva respuesta porque me gustaría mantener el 1D encuadernado que originalmente publiqué como referencia)
Esta estafa descaradamente está fuertemente inspirada en la respuesta de @ AndersKaseorg en su estructura principal, pero tiene un par de ajustes. Primero, uso mi programa original para fusionar cadenas hasta que la mejor superposición disponible sea de solo 3 caracteres. Luego uso el método que AndersKaseorg describe para llenar progresivamente una cuadrícula 2D utilizando estas cadenas generadas. Las restricciones también son un poco diferentes: todavía intenta agregar la menor cantidad de caracteres cada vez, pero los lazos se rompen al favorecer primero las cuadrículas cuadradas, luego las cuadrículas pequeñas y, finalmente, al agregar la palabra más larga.
El comportamiento que muestra es alternar entre períodos de llenado de espacio y expansión rápida de la cuadrícula (desafortunadamente se quedó sin palabras justo después de una etapa de expansión rápida, por lo que hay mucho espacio en blanco alrededor de los bordes). Sospecho que con algunos ajustes de la función de costo se podría lograr que supere el 50% de espacio.
Aquí hay 2 ejecutables (para evitar la necesidad de volver a ejecutar todo el proceso al mejorar iterativamente el algoritmo). La salida de uno se puede canalizar directamente al otro:
#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
#include <cstdlib>
std::size_t calcOverlap(const std::string &a, const std::string &b, std::size_t limit, std::size_t minimal) {
std::size_t la = a.size();
for(std::size_t p = std::min(std::min(la, b.size()), limit + 1); -- p > minimal; ) {
if(a.compare(la - p, p, b, 0, p) == 0) {
return p;
}
}
return 0;
}
bool isSameReversed(const std::string &a, const std::string &b) {
std::size_t l = a.size();
if(b.size() != l) {
return false;
}
for(std::size_t i = 0; i < l; ++ i) {
if(a[i] != b[l-i-1]) {
return false;
}
}
return true;
}
int main(int argc, const char *const *argv) {
// Usage: prog [<stop_threshold>]
std::size_t stopThreshold = 3;
if(argc >= 2) {
char *check;
long v = std::strtol(argv[1], &check, 10);
if(check == argv[1] || v < 0) {
std::cerr
<< "Invalid stop threshold. Should be an integer >= 0"
<< std::endl;
return 1;
}
stopThreshold = v;
}
std::vector<std::string> words;
// Load all words from input and their reverses (words can be backwards now)
while(true) {
std::string word;
std::getline(std::cin, word);
if(word.empty()) {
break;
}
words.push_back(word);
std::reverse(word.begin(), word.end());
words.push_back(std::move(word));
}
std::cerr
<< "Input word count: " << words.size() << std::endl;
// Remove all fully subsumed words
for(auto p = words.begin(); p != words.end(); ) {
bool subsumed = false;
for(auto i = words.begin(); i != words.end(); ++ i) {
if(i == p) {
continue;
}
if(i->find(*p) != std::string::npos) {
subsumed = true;
break;
}
}
if(subsumed) {
p = words.erase(p);
} else {
++ p;
}
}
std::cerr
<< "After subsuming checks: " << words.size()
<< std::endl;
// Sort words longest-to-shortest (not necessary but doesn't hurt. Makes finding maxlen a tiny bit easier)
std::sort(words.begin(), words.end(), [](const std::string &a, const std::string &b) {
return a.size() > b.size();
});
std::size_t maxlen = words.front().size();
// Repeatedly combine most-compatible words until we reach the threshold
std::size_t bestPossible = maxlen - 1;
while(words.size() > 2) {
auto bestA = words.begin();
auto bestB = -- words.end();
std::size_t bestOverlap = 0;
for(auto p = ++ words.begin(), e = words.end(); p != e; ++ p) {
if(p->size() - 1 <= bestOverlap) {
continue;
}
for(auto q = words.begin(); q != p; ++ q) {
std::size_t overlap = calcOverlap(*p, *q, bestPossible, bestOverlap);
if(overlap > bestOverlap && !isSameReversed(*p, *q)) {
bestA = p;
bestB = q;
bestOverlap = overlap;
}
overlap = calcOverlap(*q, *p, bestPossible, bestOverlap);
if(overlap > bestOverlap && !isSameReversed(*p, *q)) {
bestA = q;
bestB = p;
bestOverlap = overlap;
}
}
if(bestOverlap == bestPossible) {
break;
}
}
if(bestOverlap <= stopThreshold) {
break;
}
std::string newStr = std::move(*bestA);
newStr.append(*bestB, bestOverlap, std::string::npos);
if(bestA == -- words.end()) {
words.pop_back();
*bestB = std::move(words.back());
words.pop_back();
} else {
*bestB = std::move(words.back());
words.pop_back();
*bestA = std::move(words.back());
words.pop_back();
}
// Remove any words which are now in the result (forward or reverse)
// (would not be necessary if we didn't have the reversed forms too)
std::string newRev = newStr;
std::reverse(newRev.begin(), newRev.end());
for(auto p = words.begin(); p != words.end(); ) {
if(newStr.find(*p) != std::string::npos || newRev.find(*p) != std::string::npos) {
std::cerr << "Now subsumes: " << *p << std::endl;
p = words.erase(p);
} else {
++ p;
}
}
std::cerr
<< "Words remaining: " << (words.size() + 1)
<< " Latest combination: (" << bestOverlap << ") " << newStr
<< std::endl;
words.push_back(std::move(newStr));
words.push_back(std::move(newRev));
bestPossible = bestOverlap; // Merging existing words will never make longer merges possible
}
std::cerr
<< "After merging: " << words.size()
<< std::endl;
// Remove all fully subsumed words (i.e. reversed words)
for(auto p = words.begin(); p != words.end(); ) {
bool subsumed = false;
std::string rev = *p;
std::reverse(rev.begin(), rev.end());
for(auto i = words.begin(); i != words.end(); ++ i) {
if(i == p) {
continue;
}
if(i->find(*p) != std::string::npos || i->find(rev) != std::string::npos) {
subsumed = true;
break;
}
}
if(subsumed) {
p = words.erase(p);
} else {
++ p;
}
}
std::cerr
<< "After subsuming: " << words.size()
<< std::endl;
// Sort words longest-to-shortest for display
std::sort(words.begin(), words.end(), [](const std::string &a, const std::string &b) {
return a.size() > b.size();
});
std::size_t len = 0;
for(const auto &word : words) {
std::cout
<< word
<< std::endl;
len += word.size();
}
std::cerr
<< "Total size: " << len
<< std::endl;
return 0;
}
#include <iostream>
#include <string>
#include <vector>
#include <unordered_map>
#include <unordered_set>
#include <limits>
class vec2 {
public:
int x;
int y;
vec2(void) : x(0), y(0) {};
vec2(int x, int y) : x(x), y(y) {}
bool operator ==(const vec2 &b) const {
return x == b.x && y == b.y;
}
vec2 &operator +=(const vec2 &b) {
x += b.x;
y += b.y;
return *this;
}
vec2 &operator -=(const vec2 &b) {
x -= b.x;
y -= b.y;
return *this;
}
vec2 operator +(const vec2 b) const {
return vec2(x + b.x, y + b.y);
}
vec2 operator *(const int b) const {
return vec2(x * b, y * b);
}
};
class box2 {
public:
vec2 tl;
vec2 br;
box2(void) : tl(), br() {};
box2(vec2 a, vec2 b)
: tl(std::min(a.x, b.x), std::min(a.y, b.y))
, br(std::max(a.x, b.x) + 1, std::max(a.y, b.y) + 1)
{}
void grow(const box2 &b) {
if(b.tl.x < tl.x) {
tl.x = b.tl.x;
}
if(b.br.x > br.x) {
br.x = b.br.x;
}
if(b.tl.y < tl.y) {
tl.y = b.tl.y;
}
if(b.br.y > br.y) {
br.y = b.br.y;
}
}
bool intersects(const box2 &b) const {
return (
((tl.x >= b.br.x) != (br.x > b.tl.x)) &&
((tl.y >= b.br.y) != (br.y > b.tl.y))
);
}
box2 &operator +=(const vec2 b) {
tl += b;
br += b;
return *this;
}
int width(void) const {
return br.x - tl.x;
}
int height(void) const {
return br.y - tl.y;
}
int maxdim(void) const {
return std::max(width(), height());
}
};
template <> struct std::hash<vec2> {
std::size_t operator ()(const vec2 &o) const {
return std::hash<int>()(o.x) + std::hash<int>()(o.y) * 997;
}
};
template <class A,class B> struct std::hash<std::pair<A,B>> {
std::size_t operator ()(const std::pair<A,B> &o) const {
return std::hash<A>()(o.first) + std::hash<B>()(o.second) * 31;
}
};
class word_placement {
public:
vec2 start;
vec2 dir;
box2 bounds;
const std::string *word;
word_placement(vec2 start, vec2 dir, const std::string *word)
: start(start)
, dir(dir)
, bounds(start, start + dir * (word->size() - 1))
, word(word)
{}
word_placement(vec2 start, const word_placement ©)
: start(copy.start + start)
, dir(copy.dir)
, bounds(copy.bounds)
, word(copy.word)
{
bounds += start;
}
word_placement(const word_placement ©)
: start(copy.start)
, dir(copy.dir)
, bounds(copy.bounds)
, word(copy.word)
{}
};
class word_placement_links {
public:
std::unordered_set<word_placement*> placements;
std::unordered_set<std::pair<char,word_placement*>> relativePlacements;
};
class grid {
public:
std::vector<std::string> wordCache; // Just a block of memory for our pointers to reference
std::unordered_map<vec2,char> state;
std::unordered_set<word_placement*> placements;
std::unordered_map<const std::string*,word_placement_links> wordPlacements;
std::unordered_map<char,std::unordered_set<word_placement*>> relativeWordPlacements;
box2 bound;
grid(const std::vector<std::string> &words) {
wordCache = words;
std::vector<vec2> directions;
directions.emplace_back(+1, 0);
directions.emplace_back(+1, +1);
directions.emplace_back( 0, +1);
directions.emplace_back(-1, +1);
directions.emplace_back(-1, 0);
directions.emplace_back(-1, -1);
directions.emplace_back( 0, -1);
directions.emplace_back(+1, -1);
wordPlacements.reserve(wordCache.size());
placements.reserve(wordCache.size());
relativeWordPlacements.reserve(64);
std::size_t total = 0;
for(const std::string &word : wordCache) {
word_placement_links &p = wordPlacements[&word];
p.placements.reserve(8);
auto &rp = p.relativePlacements;
std::size_t l = word.size();
rp.reserve(l * directions.size());
for(int i = 0; i < l; ++ i) {
for(const vec2 &d : directions) {
word_placement *rwp = new word_placement(d * -i, d, &word);
rp.emplace(word[i], rwp);
relativeWordPlacements[word[i]].insert(rwp);
}
}
total += l;
}
state.reserve(total);
}
const std::string *find_word(const std::string &word) const {
for(const std::string &w : wordCache) {
if(w == word) {
return &w;
}
}
throw std::string("Failed to find word in cache");
}
void remove_word(const std::string *word) {
const word_placement_links &links = wordPlacements[word];
for(word_placement *p : links.placements) {
placements.erase(p);
delete p;
}
for(auto &p : links.relativePlacements) {
relativeWordPlacements[p.first].erase(p.second);
delete p.second;
}
wordPlacements.erase(word);
}
void remove_placement(word_placement *placement) {
wordPlacements[placement->word].placements.erase(placement);
placements.erase(placement);
delete placement;
}
bool check_placement(const word_placement &placement) const {
vec2 p = placement.start;
for(const char c : *placement.word) {
auto i = state.find(p);
if(i != state.end() && i->second != c) {
return false;
}
p += placement.dir;
}
return true;
}
int check_new(const word_placement &placement) const {
int n = 0;
vec2 p = placement.start;
for(const char c : *placement.word) {
n += !state.count(p);
p += placement.dir;
}
return n;
}
void check_placements(const box2 &b) {
for(auto i = placements.begin(); i != placements.end(); ) {
if(!b.intersects((*i)->bounds) || check_placement(**i)) {
++ i;
} else {
i = placements.erase(i);
}
}
}
void add_placement(const vec2 p, const word_placement &relative) {
word_placement check(p, relative);
if(check_placement(check)) {
word_placement *wp = new word_placement(check);
placements.insert(wp);
wordPlacements[relative.word].placements.insert(wp);
}
}
void place(word_placement placement) {
remove_word(placement.word);
int overlap = 0;
for(const char c : *placement.word) {
char &g = state[placement.start];
if(g == '\0') {
g = c;
for(const word_placement *rp : relativeWordPlacements[c]) {
add_placement(placement.start, *rp);
}
} else if(g != c) {
throw std::string("New word changes an existing character!");
} else {
++ overlap;
}
placement.start += placement.dir;
}
bound.grow(placement.bounds);
check_placements(placement.bounds);
std::cerr
<< draw('.', "\n")
<< "Added " << *placement.word << " (overlap: " << overlap << ")"
<< ", Grid: " << bound.width() << "x" << bound.height() << " of " << state.size() << " chars"
<< ", Words remaining: " << wordPlacements.size()
<< std::endl;
}
int check_cost(box2 b) const {
b.grow(bound);
return (
((b.maxdim() - bound.maxdim()) << 16) |
(b.width() + b.height() - bound.width() - bound.height())
);
}
void add_next(void) {
int bestNew = std::numeric_limits<int>::max();
int bestCost = std::numeric_limits<int>::max();
int bestLen = 0;
word_placement *best = nullptr;
for(word_placement *p : placements) {
int n = check_new(*p);
if(n <= bestNew) {
int l = p->word->size();
int cost = check_cost(box2(p->start, p->start + p->dir * l));
if(n < bestNew || cost < bestCost || (cost == bestCost && l < bestLen)) {
bestNew = n;
bestCost = cost;
bestLen = l;
best = p;
}
}
}
if(best == nullptr) {
throw std::string("Failed to find join to existing blob");
}
place(*best);
}
void fill(void) {
while(!placements.empty()) {
add_next();
}
}
std::string draw(char blank, const std::string &linesep) const {
std::string result;
result.reserve((bound.width() + linesep.size()) * bound.height());
for(int y = bound.tl.y; y < bound.br.y; ++ y) {
for(int x = bound.tl.x; x < bound.br.x; ++ x) {
auto c = state.find(vec2(x, y));
result.push_back((c == state.end()) ? blank : c->second);
}
result.append(linesep);
}
return result;
}
box2 bounds(void) const {
return bound;
}
int chars(void) const {
return state.size();
}
};
int main(int argc, const char *const *argv) {
std::vector<std::string> words;
// Load all words from input
while(true) {
std::string word;
std::getline(std::cin, word);
if(word.empty()) {
break;
}
words.push_back(std::move(word));
}
std::cerr
<< "Input word count: " << words.size() << std::endl;
// initialise grid
grid g(words);
// add first word (order of input file means this is longest word)
g.place(word_placement(vec2(0, 0), vec2(1, 0), g.find_word(words.front())));
// add all other words
g.fill();
std::cout << g.draw('.', "\n");
int w = g.bounds().width();
int h = g.bounds().height();
int n = g.chars();
std::cerr
<< "Final grid: " << w << "x" << h
<< " with " << n << " characters"
<< " (" << (n * 100.0 / (w * h)) << "% filled)"
<< std::endl;
return 0;
}
Y finalmente, el resultado:
Resultado alternativo (después de corregir un par de errores en el programa que sesgaban ciertas direcciones y ajustaban la función de costo, obtuve una solución más compacta pero menos óptima): 29275 caracteres, 198x195 (75.8% de relleno):
Nuevamente, no he hecho mucho para optimizar estos programas, por lo que lleva un tiempo. Pero puedes ver cómo se llena la cuadrícula, lo cual es bastante hipnótico.