extensions/ConfirmEdit/captcha-old.py

   1 #!/usr/bin/python
   2 #
   3 # Script to generate distorted text images for a captcha system.
   4 #
   5 # Copyright (C) 2005 Neil Harris
   6 #
   7 # This program is free software; you can redistribute it and/or modify
   8 # it under the terms of the GNU General Public License as published by
   9 # the Free Software Foundation; either version 2 of the License, or
  10 # (at your option) any later version.
  11 #
  12 # This program is distributed in the hope that it will be useful,
  13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15 # GNU General Public License for more details.
  16 #
  17 # You should have received a copy of the GNU General Public License along
  18 # with this program; if not, write to the Free Software Foundation, Inc.,
  19 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  20 # http://www.gnu.org/copyleft/gpl.html
  21 #
  22 # Further tweaks by Brion Vibber <brion@pobox.com>:
  23 # 2006-01-26: Add command-line options for the various parameters
  24 # 2007-02-19: Add --dirs param for hash subdirectory splits
  25 # Tweaks by Greg Sabino Mullane <greg@turnstep.com>:
  26 # 2008-01-06: Add regex check to skip words containing other than a-z
  27
  28 import random
  29 import math
  30 import hashlib
  31 from optparse import OptionParser
  32 import os
  33 import sys
  34 import re
  35 import multiprocessing
  36 import time
  37
  38 try:
  39         from PIL import Image
  40         from PIL import ImageFont
  41         from PIL import ImageDraw
  42         from PIL import ImageEnhance
  43         from PIL import ImageOps
  44 except:
  45         sys.exit("This script requires the Python Imaging Library - http://www.pythonware.com/products/pil/")
  46
  47 nonalpha = re.compile('[^a-z]') # regex to test for suitability of words
  48
  49 # Does X-axis wobbly copy, sandwiched between two rotates
  50 def wobbly_copy(src, wob, col, scale, ang):
  51         x, y = src.size
  52         f = random.uniform(4*scale, 5*scale)
  53         p = random.uniform(0, math.pi*2)
  54         rr = ang+random.uniform(-30, 30) # vary, but not too much
  55         int_d = Image.new('RGB', src.size, 0) # a black rectangle
  56         rot = src.rotate(rr, Image.BILINEAR)
  57         # Do a cheap bounding-box op here to try to limit work below
  58         bbx = rot.getbbox()
  59         if bbx == None:
  60                 return src
  61         else:
  62                 l, t, r, b= bbx
  63         # and only do lines with content on
  64         for i in range(t, b+1):
  65                 # Drop a scan line in
  66                 xoff = int(math.sin(p+(i*f/y))*wob)
  67                 xoff += int(random.uniform(-wob*0.5, wob*0.5))
  68                 int_d.paste(rot.crop((0, i, x, i+1)), (xoff, i))
  69         # try to stop blurring from building up
  70         int_d = int_d.rotate(-rr, Image.BILINEAR)
  71         enh = ImageEnhance.Sharpness(int_d)
  72         return enh.enhance(2)
  73
  74
  75 def gen_captcha(text, fontname, fontsize, file_name):
  76         """Generate a captcha image"""
  77         # white text on a black background
  78         bgcolor = 0x0
  79         fgcolor = 0xffffff
  80         # create a font object
  81         font = ImageFont.truetype(fontname,fontsize)
  82         # determine dimensions of the text
  83         dim = font.getsize(text)
  84         # create a new image significantly larger that the text
  85         edge = max(dim[0], dim[1]) + 2*min(dim[0], dim[1])
  86         im = Image.new('RGB', (edge, edge), bgcolor)
  87         d = ImageDraw.Draw(im)
  88         x, y = im.size
  89         # add the text to the image
  90         d.text((x/2-dim[0]/2, y/2-dim[1]/2), text, font=font, fill=fgcolor)
  91         k = 3
  92         wob = 0.20*dim[1]/k
  93         rot = 45
  94         # Apply lots of small stirring operations, rather than a few large ones
  95         # in order to get some uniformity of treatment, whilst
  96         # maintaining randomness
  97         for i in range(k):
  98                 im = wobbly_copy(im, wob, bgcolor, i*2+3, rot+0)
  99                 im = wobbly_copy(im, wob, bgcolor, i*2+1, rot+45)
 100                 im = wobbly_copy(im, wob, bgcolor, i*2+2, rot+90)
 101                 rot += 30
 102
 103         # now get the bounding box of the nonzero parts of the image
 104         bbox = im.getbbox()
 105         bord = min(dim[0], dim[1])/4 # a bit of a border
 106         im = im.crop((bbox[0]-bord, bbox[1]-bord, bbox[2]+bord, bbox[3]+bord))
 107         # and turn into black on white
 108         im = ImageOps.invert(im)
 109
 110         # save the image, in format determined from filename
 111         im.save(file_name)
 112
 113 def gen_subdir(basedir, md5hash, levels):
 114         """Generate a subdirectory path out of the first _levels_
 115         characters of _hash_, and ensure the directories exist
 116         under _basedir_."""
 117         subdir = None
 118         for i in range(0, levels):
 119                 char = md5hash[i]
 120                 if subdir:
 121                         subdir = os.path.join(subdir, char)
 122                 else:
 123                         subdir = char
 124                 fulldir = os.path.join(basedir, subdir)
 125                 if not os.path.exists(fulldir):
 126                         os.mkdir(fulldir)
 127         return subdir
 128
 129 def try_pick_word(words, blacklist, verbose, nwords, min_length, max_length):
 130         if words is not None:
 131                 word = words[random.randint(0,len(words)-1)]
 132                 while nwords > 1:
 133                         word2 = words[random.randint(0,len(words)-1)]
 134                         word = word + word2
 135                         nwords = nwords - 1
 136         else:
 137                 word = ''
 138                 max_length = max_length if max_length > 0 else 10
 139                 for i in range(0, random.randint(min_length, max_length)):
 140                         word = word + chr(97 + random.randint(0,25))
 141
 142         if verbose:
 143                 print("word is %s" % word)
 144
 145         if len(word) < min_length:
 146                 if verbose:
 147                         print("skipping word pair '%s' because it has fewer than %d characters" % (word, min_length))
 148                 return None
 149
 150         if max_length > 0 and len(word) > max_length:
 151                 if verbose:
 152                         print("skipping word pair '%s' because it has more than %d characters" % (word, max_length))
 153                 return None
 154
 155         if nonalpha.search(word):
 156                 if verbose:
 157                         print("skipping word pair '%s' because it contains non-alphabetic characters" % word)
 158                 return None
 159
 160         for naughty in blacklist:
 161                 if naughty in word:
 162                         if verbose:
 163                                 print("skipping word pair '%s' because it contains blacklisted word '%s'" % (word, naughty))
 164                         return None
 165         return word
 166
 167 def pick_word(words, blacklist, verbose, nwords, min_length, max_length):
 168         for x in range(1000): # If we can't find a valid combination in 1000 tries, just give up
 169                 word = try_pick_word(words, blacklist, verbose, nwords, min_length, max_length)
 170                 if word:
 171                         return word
 172         sys.exit("Unable to find valid word combinations")
 173
 174 def read_wordlist(filename):
 175         f = open(filename)
 176         words = [x.strip().lower() for x in f.readlines()]
 177         f.close()
 178         return words
 179
 180 def run_in_thread(object):
 181         count = object[0];
 182         words = object[1]
 183         blacklist = object[2]
 184         opts = object[3]
 185         font = object[4]
 186         fontsize = object[5]
 187
 188         for i in range(count):
 189                 word = pick_word(words, blacklist, verbose, opts.number_words, opts.min_length, opts.max_length)
 190                 salt = "%08x" % random.randrange(2**32)
 191                 # 64 bits of hash is plenty for this purpose
 192                 md5hash = hashlib.md5((key+salt+word+key+salt).encode('utf-8')).hexdigest()[:16]
 193                 filename = "image_%s_%s.png" % (salt, md5hash)
 194                 if dirs:
 195                         subdir = gen_subdir(output, md5hash, dirs)
 196                         filename = os.path.join(subdir, filename)
 197                 if verbose:
 198                         print(filename)
 199                 gen_captcha(word, font, fontsize, os.path.join(output, filename))
 200
 201 if __name__ == '__main__':
 202         """This grabs random words from the dictionary 'words' (one
 203         word per line) and generates a captcha image for each one,
 204         with a keyed salted hash of the correct answer in the filename.
 205
 206         To check a reply, hash it in the same way with the same salt and
 207         secret key, then compare with the hash value given.
 208         """
 209         script_dir = os.path.dirname(os.path.realpath(__file__))
 210         parser = OptionParser()
 211         parser.add_option("--wordlist", help="A list of words (required)", metavar="WORDS.txt")
 212         parser.add_option("--random", help="Use random charcters instead of a wordlist", action="store_true")
 213         parser.add_option("--key", help="The passphrase set as $wgCaptchaSecret (required)", metavar="KEY")
 214         parser.add_option("--output", help="The directory to put the images in - $wgCaptchaDirectory (required)", metavar="DIR")
 215         parser.add_option("--font", help="The font to use (required)", metavar="FONT.ttf")
 216         parser.add_option("--font-size", help="The font size (default 40)", metavar="N", type='int', default=40)
 217         parser.add_option("--count", help="The maximum number of images to make (default 20)", metavar="N", type='int', default=20)
 218         parser.add_option("--blacklist", help="A blacklist of words that should not be used", metavar="FILE", default=os.path.join(script_dir, "blacklist"))
 219         parser.add_option("--fill", help="Fill the output directory to contain N files, overrides count, cannot be used with --dirs", metavar="N", type='int')
 220         parser.add_option("--dirs", help="Put the images into subdirectories N levels deep - $wgCaptchaDirectoryLevels", metavar="N", type='int')
 221         parser.add_option("--verbose", "-v", help="Show debugging information", action='store_true')
 222         parser.add_option("--number-words", help="Number of words from the wordlist which make a captcha challenge (default 2)", type='int', default=2)
 223         parser.add_option("--min-length", help="Minimum length for a captcha challenge", type='int', default=1)
 224         parser.add_option("--max-length", help="Maximum length for a captcha challenge", type='int', default=-1)
 225         parser.add_option("--threads", help="Maximum number of threads to be used to generate captchas.", type='int', default=1)
 226
 227         opts, args = parser.parse_args()
 228
 229         if opts.wordlist:
 230                 wordlist = opts.wordlist
 231         elif opts.random:
 232                 wordlist = None
 233         else:
 234                 sys.exit("Need to specify a wordlist")
 235         if opts.key:
 236                 key = opts.key
 237         else:
 238                 sys.exit("Need to specify a key")
 239         if opts.output:
 240                 output = opts.output
 241         else:
 242                 sys.exit("Need to specify an output directory")
 243         if opts.font and os.path.exists(opts.font):
 244                 font = opts.font
 245         else:
 246                 sys.exit("Need to specify the location of a font")
 247
 248         blacklist = read_wordlist(opts.blacklist)
 249         count = opts.count
 250         fill = opts.fill
 251         dirs = opts.dirs
 252         verbose = opts.verbose
 253         fontsize = opts.font_size
 254         threads = opts.threads
 255
 256         if fill:
 257                 count = max(0, fill - len(os.listdir(output)))
 258
 259         words = None
 260         if wordlist:
 261                 words = read_wordlist(wordlist)
 262                 words = [x for x in words
 263                         if len(x) in (4,5) and x[0] != "f"
 264                         and x[0] != x[1] and x[-1] != x[-2]]
 265
 266         if count == 0:
 267                 sys.exit("No need to generate CAPTCHA images.")
 268
 269         if count < threads:
 270                 chunks = 1
 271                 threads = 1
 272         else:
 273                 chunks = int(count / threads)
 274
 275         p = multiprocessing.Pool(threads);
 276         data = []
 277         print("Generating %s CAPTCHA images separated in %s image(s) per chunk run by %s threads..." % (count, chunks, threads))
 278         for i in range(0, threads):
 279                 data.append([chunks, words, blacklist, opts, font, fontsize])
 280
 281         p.map(run_in_thread, data)
 282