3 # Script to generate distorted text images for a captcha system.
5 # Copyright (C) 2005 Neil Harris
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License along
18 # with this program; if not, write to the Free Software Foundation, Inc.,
19 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 # http://www.gnu.org/copyleft/gpl.html
22 # Further tweaks by Brion Vibber <brion@pobox.com>:
23 # 2006-01-26: Add command-line options for the various parameters
24 # 2007-02-19: Add --dirs param for hash subdirectory splits
25 # Tweaks by Greg Sabino Mullane <greg@turnstep.com>:
26 # 2008-01-06: Add regex check to skip words containing other than a-z
31 from optparse import OptionParser
35 import multiprocessing
40 from PIL import ImageFont
41 from PIL import ImageDraw
42 from PIL import ImageEnhance
43 from PIL import ImageOps
45 sys.exit("This script requires the Python Imaging Library - http://www.pythonware.com/products/pil/")
47 nonalpha = re.compile('[^a-z]') # regex to test for suitability of words
49 # Does X-axis wobbly copy, sandwiched between two rotates
50 def wobbly_copy(src, wob, col, scale, ang):
52 f = random.uniform(4*scale, 5*scale)
53 p = random.uniform(0, math.pi*2)
54 rr = ang+random.uniform(-30, 30) # vary, but not too much
55 int_d = Image.new('RGB', src.size, 0) # a black rectangle
56 rot = src.rotate(rr, Image.BILINEAR)
57 # Do a cheap bounding-box op here to try to limit work below
63 # and only do lines with content on
64 for i in range(t, b+1):
66 xoff = int(math.sin(p+(i*f/y))*wob)
67 xoff += int(random.uniform(-wob*0.5, wob*0.5))
68 int_d.paste(rot.crop((0, i, x, i+1)), (xoff, i))
69 # try to stop blurring from building up
70 int_d = int_d.rotate(-rr, Image.BILINEAR)
71 enh = ImageEnhance.Sharpness(int_d)
75 def gen_captcha(text, fontname, fontsize, file_name):
76 """Generate a captcha image"""
77 # white text on a black background
80 # create a font object
81 font = ImageFont.truetype(fontname,fontsize)
82 # determine dimensions of the text
83 dim = font.getsize(text)
84 # create a new image significantly larger that the text
85 edge = max(dim[0], dim[1]) + 2*min(dim[0], dim[1])
86 im = Image.new('RGB', (edge, edge), bgcolor)
87 d = ImageDraw.Draw(im)
89 # add the text to the image
90 d.text((x/2-dim[0]/2, y/2-dim[1]/2), text, font=font, fill=fgcolor)
94 # Apply lots of small stirring operations, rather than a few large ones
95 # in order to get some uniformity of treatment, whilst
96 # maintaining randomness
98 im = wobbly_copy(im, wob, bgcolor, i*2+3, rot+0)
99 im = wobbly_copy(im, wob, bgcolor, i*2+1, rot+45)
100 im = wobbly_copy(im, wob, bgcolor, i*2+2, rot+90)
103 # now get the bounding box of the nonzero parts of the image
105 bord = min(dim[0], dim[1])/4 # a bit of a border
106 im = im.crop((bbox[0]-bord, bbox[1]-bord, bbox[2]+bord, bbox[3]+bord))
107 # and turn into black on white
108 im = ImageOps.invert(im)
110 # save the image, in format determined from filename
113 def gen_subdir(basedir, md5hash, levels):
114 """Generate a subdirectory path out of the first _levels_
115 characters of _hash_, and ensure the directories exist
118 for i in range(0, levels):
121 subdir = os.path.join(subdir, char)
124 fulldir = os.path.join(basedir, subdir)
125 if not os.path.exists(fulldir):
129 def try_pick_word(words, blacklist, verbose, nwords, min_length, max_length):
130 if words is not None:
131 word = words[random.randint(0,len(words)-1)]
133 word2 = words[random.randint(0,len(words)-1)]
138 max_length = max_length if max_length > 0 else 10
139 for i in range(0, random.randint(min_length, max_length)):
140 word = word + chr(97 + random.randint(0,25))
143 print("word is %s" % word)
145 if len(word) < min_length:
147 print("skipping word pair '%s' because it has fewer than %d characters" % (word, min_length))
150 if max_length > 0 and len(word) > max_length:
152 print("skipping word pair '%s' because it has more than %d characters" % (word, max_length))
155 if nonalpha.search(word):
157 print("skipping word pair '%s' because it contains non-alphabetic characters" % word)
160 for naughty in blacklist:
163 print("skipping word pair '%s' because it contains blacklisted word '%s'" % (word, naughty))
167 def pick_word(words, blacklist, verbose, nwords, min_length, max_length):
168 for x in range(1000): # If we can't find a valid combination in 1000 tries, just give up
169 word = try_pick_word(words, blacklist, verbose, nwords, min_length, max_length)
172 sys.exit("Unable to find valid word combinations")
174 def read_wordlist(filename):
176 words = [x.strip().lower() for x in f.readlines()]
180 def run_in_thread(object):
183 blacklist = object[2]
188 for i in range(count):
189 word = pick_word(words, blacklist, verbose, opts.number_words, opts.min_length, opts.max_length)
190 salt = "%08x" % random.randrange(2**32)
191 # 64 bits of hash is plenty for this purpose
192 md5hash = hashlib.md5((key+salt+word+key+salt).encode('utf-8')).hexdigest()[:16]
193 filename = "image_%s_%s.png" % (salt, md5hash)
195 subdir = gen_subdir(output, md5hash, dirs)
196 filename = os.path.join(subdir, filename)
199 gen_captcha(word, font, fontsize, os.path.join(output, filename))
201 if __name__ == '__main__':
202 """This grabs random words from the dictionary 'words' (one
203 word per line) and generates a captcha image for each one,
204 with a keyed salted hash of the correct answer in the filename.
206 To check a reply, hash it in the same way with the same salt and
207 secret key, then compare with the hash value given.
209 script_dir = os.path.dirname(os.path.realpath(__file__))
210 parser = OptionParser()
211 parser.add_option("--wordlist", help="A list of words (required)", metavar="WORDS.txt")
212 parser.add_option("--random", help="Use random charcters instead of a wordlist", action="store_true")
213 parser.add_option("--key", help="The passphrase set as $wgCaptchaSecret (required)", metavar="KEY")
214 parser.add_option("--output", help="The directory to put the images in - $wgCaptchaDirectory (required)", metavar="DIR")
215 parser.add_option("--font", help="The font to use (required)", metavar="FONT.ttf")
216 parser.add_option("--font-size", help="The font size (default 40)", metavar="N", type='int', default=40)
217 parser.add_option("--count", help="The maximum number of images to make (default 20)", metavar="N", type='int', default=20)
218 parser.add_option("--blacklist", help="A blacklist of words that should not be used", metavar="FILE", default=os.path.join(script_dir, "blacklist"))
219 parser.add_option("--fill", help="Fill the output directory to contain N files, overrides count, cannot be used with --dirs", metavar="N", type='int')
220 parser.add_option("--dirs", help="Put the images into subdirectories N levels deep - $wgCaptchaDirectoryLevels", metavar="N", type='int')
221 parser.add_option("--verbose", "-v", help="Show debugging information", action='store_true')
222 parser.add_option("--number-words", help="Number of words from the wordlist which make a captcha challenge (default 2)", type='int', default=2)
223 parser.add_option("--min-length", help="Minimum length for a captcha challenge", type='int', default=1)
224 parser.add_option("--max-length", help="Maximum length for a captcha challenge", type='int', default=-1)
225 parser.add_option("--threads", help="Maximum number of threads to be used to generate captchas.", type='int', default=1)
227 opts, args = parser.parse_args()
230 wordlist = opts.wordlist
234 sys.exit("Need to specify a wordlist")
238 sys.exit("Need to specify a key")
242 sys.exit("Need to specify an output directory")
243 if opts.font and os.path.exists(opts.font):
246 sys.exit("Need to specify the location of a font")
248 blacklist = read_wordlist(opts.blacklist)
252 verbose = opts.verbose
253 fontsize = opts.font_size
254 threads = opts.threads
257 count = max(0, fill - len(os.listdir(output)))
261 words = read_wordlist(wordlist)
262 words = [x for x in words
263 if len(x) in (4,5) and x[0] != "f"
264 and x[0] != x[1] and x[-1] != x[-2]]
267 sys.exit("No need to generate CAPTCHA images.")
273 chunks = int(count / threads)
275 p = multiprocessing.Pool(threads);
277 print("Generating %s CAPTCHA images separated in %s image(s) per chunk run by %s threads..." % (count, chunks, threads))
278 for i in range(0, threads):
279 data.append([chunks, words, blacklist, opts, font, fontsize])
281 p.map(run_in_thread, data)