Crossword generator based on Z3

We assign an integer to each character in crossword, it reflects ASCII code of it.

Then we enumerate all possible horizontal/vertical "sticks" longer than 1 and assign words to them.

For example, there is a horizontal stick of length 3. And we have such 3-letter words in our dictionary: "the", "she", "xor".

We add the following constraint:

Or(
	And(chars[X][Y]=='t', chars[X][Y+1]=='h', chars[X][Y+2]=='e'),
	And(chars[X][Y]=='s', chars[X][Y+1]=='h', chars[X][Y+2]=='e'),
	And(chars[X][Y]=='x', chars[X][Y+1]=='o', chars[X][Y+2]=='r'))

One of these words would be choosen automatically.

Index of each word is also considered, because duplicates are not allowed.

Sample pattern:

**** **********
 * * *  * * * *
***************
 * * *  * * * *
********* *****
 * * * * * *  *
****** ********
   * * * * *   
******** ******
*  * * * * * * 
***** *********
* * * *  * * * 
***************
* * * *  * * * 
********** ****

Sample result:

spur stimulated
 r e c  i a h e
congratulations
 m u t  a i s c
violation niece
 s a e p e n  n
rector penitent
   i i o c e
accounts herald
s  n g e a r o
press edinburgh
e x e n  t p i
characteristics
t c l r  n e a
satisfying dull

horizontal:
((0, 0), (0, 3)) spur
((0, 5), (0, 14)) stimulated
((2, 0), (2, 14)) congratulations
((4, 0), (4, 8)) violation
((4, 10), (4, 14)) niece
((6, 0), (6, 5)) rector
((6, 7), (6, 14)) penitent
((8, 0), (8, 7)) accounts
((8, 9), (8, 14)) herald
((10, 0), (10, 4)) press
((10, 6), (10, 14)) edinburgh
((12, 0), (12, 14)) characteristics
((14, 0), (14, 9)) satisfying
((14, 11), (14, 14)) dull
vertical:
((8, 0), (14, 0)) aspects
((0, 1), (6, 1)) promise
((10, 2), (14, 2)) exact
((0, 3), (10, 3)) regulations
((10, 4), (14, 4)) seals
((0, 5), (9, 5)) scattering
((10, 6), (14, 6)) entry
((4, 7), (10, 7)) opposed
((0, 8), (4, 8)) milan
((5, 9), (14, 9)) enchanting
((0, 10), (4, 10)) latin
((4, 11), (14, 11)) interrupted
((0, 12), (4, 12)) those
((8, 13), (14, 13)) logical
((0, 14), (6, 14)) descent

Unsat is possible if the dictionary is too small or have no words of length present in pattern.

The source code:

#!/usr/bin/env python

from z3 import *
import sys

"""
# https://commons.wikimedia.org/wiki/File:Khachbar-1.jpg
pattern=[
"*****",
"* * *",
"* ***",
"*** *",
"* ***",
"* * *",
"*****"]
"""

"""
# https://commons.wikimedia.org/wiki/File:Khachbar-4.jpg
pattern=[
"*******",
"* * * *",
"*******",
"* * * *",
"*******",
"* * * *",
"*******"]
"""

# https://commons.wikimedia.org/wiki/File:British_crossword.svg
pattern=[
"**** **********",
" * * *  * * * *",
"***************",
" * * *  * * * *",
"********* *****",
" * * * * * *  *",
"****** ********",
"   * * * * *   ",
"******** ******",
"*  * * * * * * ",
"***** *********",
"* * * *  * * * ",
"***************",
"* * * *  * * * ",
"********** ****"]

HEIGHT=len(pattern)
WIDTH=len(pattern[0])

# scan pattern[] and find all "sticks" longer than 1 and collect its coordinates:

horizontal=[]

in_the_middle=False
for r in range(HEIGHT):
    for c in range(WIDTH):
        if pattern[r][c]=='*' and in_the_middle==False:
            in_the_middle=True
            start=(r,c)
        elif pattern[r][c]==' ' and in_the_middle==True:
            if c-start[1]>1:
                horizontal.append ((start, (r, c-1)))
            in_the_middle=False
    if in_the_middle:
        if c-start[1]>1:
            horizontal.append ((start, (r, c)))
        in_the_middle=False

vertical=[]

in_the_middle=False
for c in range(WIDTH):
    for r in range(HEIGHT):
        if pattern[r][c]=='*' and in_the_middle==False:
            in_the_middle=True
            start=(r,c)
        elif pattern[r][c]==' ' and in_the_middle==True:
            if r-start[0]>1:
                vertical.append ((start, (r-1, c)))
            in_the_middle=False
    if in_the_middle:
        if r-start[0]>1:
            vertical.append ((start, (r, c)))
        in_the_middle=False

# for the first simple pattern, we will have such coordinates of "sticks":
# horizontal=[((0, 0), (0, 4)), ((2, 2), (2, 4)), ((3, 0), (3, 2)), ((4, 2), (4, 4)), ((6, 0), (6, 4))]
# vertical=[((0, 0), (6, 0)), ((0, 2), (6, 2)), ((0, 4), (6, 4))]

# the list in this file is assumed to not have duplicates, otherwise duplicates can be present in the final resulting crossword:
with open("words.txt") as f:
    content = f.readlines()
words = [x.strip() for x in content]

# FIXME: slow, called too often
def find_words_len(l):
    rt=[]
    i=0
    for word in words:
        if len(word)==l:
            rt.append ((i, word))
        i=i+1
    return rt

# 2D array of ASCII codes of all characters:
chars=[[Int('chars_%d_%d' % (r, c)) for c in range(WIDTH)] for r in range(HEIGHT)]
# indices of horizontal words:
horizontal_idx=[Int('horizontal_idx_%d' % i) for i in range(len(horizontal))]
# indices of vertical words:
vertical_idx=[Int('vertical_idx_%d' % i) for i in range(len(vertical))]

s=Solver()

# this function takes coordinates, word length and word itself
# for "hello", it returns array like:
# [chars[0][0]==ord('h'), chars[0][1]==ord('e'), chars[0][2]==ord('l'), chars[0][3]==ord('l'), chars[0][4]==ord('o')]
def form_H_expr(r, c, l, w):
    return [chars[r][c+i]==ord(w[i]) for i in range(l)]

# now we find all horizonal "sticks", we find all possible words of corresponding length...
for i in range(len(horizontal)):
    h=horizontal[i]
    _from=h[0]
    _to=h[1]
    l=_to[1]-_from[1]+1
    list_of_ANDs=[]
    for idx, word in find_words_len(l):
        # at this point, we form expression like:
        # And(chars[0][0]==ord('h'), chars[0][1]==ord('e'), chars[0][2]==ord('l'), chars[0][3]==ord('l'), chars[0][4]==ord('o'), horizontal_idx[]==...)
        list_of_ANDs.append(And(form_H_expr(_from[0], _from[1], l, word)+[horizontal_idx[i]==idx]))
    # at this point, we form expression like:
    # Or(And(chars...==word1), And(chars...==word2), And(chars...==word3))
    s.add(Or(*list_of_ANDs))

# same for vertical "sticks":
def form_V_expr(r, c, l, w):
    return [chars[r+i][c]==ord(w[i]) for i in range(l)]

for i in range(len(vertical)):
    v=vertical[i]
    _from=v[0]
    _to=v[1]
    l=_to[0]-_from[0]+1
    list_of_ANDs=[]
    for idx, word in find_words_len(l):
        list_of_ANDs.append(And(form_V_expr(_from[0], _from[1], l, word)+[vertical_idx[i]==idx]))
    s.add(Or(*list_of_ANDs))

# we collected indices of horizonal/vertical words to make sure they will not be duplicated on resulting crossword:
s.add(Distinct(*horizontal_idx))
s.add(Distinct(*vertical_idx))

def print_model (m):
    print ""
    for r in range(HEIGHT):
        for c in range(WIDTH):
            if pattern[r][c]=='*':
                sys.stdout.write(chr(m[chars[r][c]].as_long()))
            else:
                sys.stdout.write(' ')
        print ""
    print ""

    print "horizontal:"
    for i in range(len(horizontal)):
        print horizontal[i], words[m[horizontal_idx[i]].as_long()]

    print "vertical:"
    for i in range(len(vertical)):
        print vertical[i], words[m[vertical_idx[i]].as_long()]

N=10
# get 10 results:
results=[]
for i in range(N):
    if s.check() == sat:
        m = s.model()
        print_model(m)
        results.append(m)
        block = []
        for d in m:
            c=d()
            block.append(c != m[d])
        s.add(Or(block))
    else:
        print "total results", len(results)
        break


The files, including my dictionary: https://github.com/dennis714/yurichev.com/tree/master/blog/crossword_Z3.


→ [list of blog posts, my twitter/facebook]

Please drop me email about any bug(s) and suggestion(s): dennis(@)yurichev.com.