#!/usr/local/bin/python import sys, math, random import optparse, fileinput optparser = optparse.OptionParser(usage="%prog [options]") optparser.add_option('-m', '--model', dest='model_filename', type='str', default=None, help='language model file') optparser.add_option('-o', '--output', dest='output_filename', type='str', default="-", help='output file') optparser.add_option('-r', '--random', dest='n_random', type='int', default="1", help='number of random sentences to generate') (opts,args) = optparser.parse_args() if opts.model_filename is None: sys.stderr.write("Error: must provide exactly one language model file\n") sys.exit(1) model_file = file(opts.model_filename) ngrams = {} n = None for line in model_file: (ngram,p) = line.split("\t") ngram = tuple(ngram.split()) if n is None: n = len(ngram) ngrams.setdefault(ngram[:-1], {}).setdefault(ngram[-1], float(p)) if opts.output_filename == "-": output_file = sys.stdout else: output_file = file(opts.output_filename, "w") for i in xrange(opts.n_random): context = ("",)*(n-1) output = [] for j in xrange(1000): # max sentence length pc = 0.0 r = random.random() for (word, p) in ngrams.get(context,{}).iteritems(): pc += p if pc >= r: if word != "": output.append(word) context = (context+(word,))[-n+1:] break else: # this shouldn't happen output.append("?") if word == "": break output_file.write(" ".join(output) + "\n")