Given a dictionary of millions of words, give an algorithm to find the largest possible rectangle of letters such that every row forms a word (reading left to right) and every column forms a word (reading top to bottom).
This is a very interesting problem. The book didn't give us a very clear solution. However, the approach is correct. First, group the words in the sets, the largest rectangle it can form must come the longest words, if they can form a rectangle. If they cannot, iteratively find all rectangles and compare it with the current maximum one.
To find the rectangle, I found a solution online mentioned using backtracking, however, I couldn't figure out that solution. I have an open question
here, feel free to answer it if you know.
My solution is a generalized solution from my previous
post, it is also a backtracking solution, but more like a bloody brutal one. I still use a trie to store all words in the columns. Each time I add a string into a row, check if all substrings formed by the columns can be found in the trie, if any of them cannot, return false. I also use a previous list to store all the previous substrings formed by the columns. This list will also be used to check if there are duplicates in rows and columns if the input set for rows and columns are the same.
As you see the code, it is very expensive and tedious. Finding the rectangle will take O(mn) where m is the rows of the rectangle and n is the columns of the rectangle. Group the words will take O(millions) given the problem set that there are millions of words. And iteratively find all possible rectangles will take O(millions * millions * m * n)... Don't want to even think of it.
I didn't test the code, so it may have some errors / bugs.
import java.util.*;
public class MakeRectangle {
public List largestRectangle(Set dicts){
if(dicts == null || dicts.size() == 0)
return null;
int L = getMax(dicts);
List rst = new ArrayList ();
List max = null;
int maxArea = 0;
for(int r = L; r >= 1; r--){
for(int c = L; c >= 1; c--){
Set rows = findWords(dicts, r);
if(r == c)
rst = rectangle(rows, r, rows, c);
else{
Set cols = findWords(dicts, c);
rst = rectangle(rows, r, cols, c);
}
if(rst != null){
//if the longest words can form a rectangle
//it is guaranteed to be the largest one, return the rst
if(r == L && c == L)
return rst;
//otherwise we wouldn't know which one has the largest area
//until we find all valid area
int currArea = rst.size() * rst.get(0).length();
if(max == null || maxArea < currArea){
max = new ArrayList (rst);
maxArea = currArea;
}
}
}
}
return rst;
}
private int getMax(Set dicts){
int length = 0;
for(String s : dicts)
length = Math.max(length, s.length());
return length;
}
private Set findWords(Set dicts, int length){
Set rst = new HashSet ();
for(String s : dicts){
if(s.length() == length)
rst.add(s);
}
return rst;
}
//m: length of each word in cols
//n: length of each word in rows
public List rectangle(Set rows, int n, Set cols, int m){
//since we need to build a rectangle, if number of words in rows
//is smaller than the length of the words in cols, then we will not have
//have enough words to build the rectangle, return null
if(rows.size() < m || cols.size() < n)
return null;
List matrix = new ArrayList (m);
Trie columns = new Trie();
columns.add(cols);
List previous = new ArrayList (n);
for(int i = 0; i < n; i++)
previous.add("");
if(build(rows, columns, matrix, 0, previous, m))
return matrix;
return null;
}
private boolean build(Set rows, Trie columns, List matrix, int row, List previous, int m){
if(row == m){
//for the condition where rows and columns are from the same set, we need to avoid duplicates in rows and cols)
if(previous.get(0).length() == m){
for(String s : previous){
if(matrix.contains(s))
return false;
}
}
return true;
}
for(String s : rows){
if(matrix.contains(s))
continue;
boolean valid = true;
int indexS = 0;
int indexL = 0;
while(indexS < s.length() && indexL < previous.size() && valid){
String tmp = previous.get(indexL++) + s.charAt(indexS++);
if(!columns.contains(tmp)){
valid = false;
break;
}
}
if(!valid)
continue;
matrix.add(s);
indexS = 0;
indexL = 0;
while(indexS < s.length() && indexL < previous.size()){
previous.set(indexL, previous.get(indexL) + s.charAt(indexS));
indexL++;
indexS++;
}
if(build(rows, columns, matrix, row + 1, previous, m))
return true;
matrix.remove(matrix.size() - 1);
indexL = 0;
while(indexL < previous.size()){
String tmp = previous.get(indexL);
previous.set(indexL, tmp.substring(0, tmp.length() - 1));
}
}
return false;
}
}
import java.util.*;
public class Trie {
protected final Map children;
protected String value;
protected boolean isWord = false;
private Trie(String value){
this.value = value;
children = new HashMap();
}
public Trie(){
this("");
}
public void add(String word){
if(word == null || word.length() == 0){
System.out.println("Cannot add null or empty string!");
return;
}
Trie node = this;
for(char c : word.toCharArray()){
if(!node.children.containsKey(c)){
node.put(c);
}
node = node.children.get(c);
}
isWord = true;
}
private void put(char c){
children.put(c, new Trie(this.value + c));
}
public void add(Collection dict){
for(String word : dict)
add(word);
}
public boolean contains(String s){
Trie node = this;
for(char c : s.toCharArray()){
if(!node.children.containsKey(c))
return false;
node = node.children.get(c);
}
return node.value.equals(s);
}
}