Package schlachtfeld :: Package namegen :: Package pycfnamegen :: Module cfnamegen
[hide private]
[frames] | no frames]

Source Code for Module schlachtfeld.namegen.pycfnamegen.cfnamegen

  1  # Context-free grammar random name generator 
  2  # Jeremy Thurgood <firxen@gmail.com> 
  3  # Highly experimental at present, but sort of working 
  4   
  5  import random 
  6  import re 
  7  import sys 
  8  import yaml 
  9   
 10   
11 -class GrammarError(RuntimeError):
12 pass
13
14 -class CFNameGen(object):
15 # This should be done using gettext for i18n, but I can't be bothered to figure 16 # out how to do it properly, so I'm using replacement strings for now. 17 stringUndefinedNonTerminal = "Undefined non-terminal \"%(undefinedNonTerminal)s\" in rule \"%(rule)s\"." 18 19 # Regular expression to catch non-terminals, used frequently, so global 20 reNonTerminal = re.compile(r"<(\w+)>") 21
22 - def __init__(self, nameGrammar):
23 """Create a namegen object. 24 25 We take a grammar dict, as before the Great Refactoring. 26 """ 27 28 self.checkTypes(nameGrammar) 29 self.grammar = nameGrammar
30 31 # checkTypes() is only useful while testing with internally specified grammars. 32 # Once we're parsing an external file it becomes unnecessary since we generate 33 # the data types ourselves instead of asking a human to do it. As such, error 34 # strings are hardcoded. Anyone who sees them would be messing around in here 35 # anyway.
36 - def checkTypes(self, nameGrammar):
37 """Check given grammar object for correct datatypes. 38 39 This function is only really necessary while the grammar's still being 40 specified in here. It will likely disappear when we parse the grammar from a 41 data file. 42 """ 43 if not isinstance(nameGrammar, dict): 44 raise GrammarError("Grammar data is not a dictionary!") 45 for rule, rhs in nameGrammar.items(): 46 if not isinstance(rhs, list): 47 raise GrammarError("Rule \"%s\" is not a list!" % rule) 48 for option in rhs: 49 if not isinstance(option, str): 50 raise GrammarError("Rule \"%s\" does not contain only strings!" % rule)
51 52 # Grammar verification stuff follows. We can probably make this throw warnings 53 # and correct problems, but that's a job for another day. Incorrect grammars 54 # probably won't provide useful output anyway. If this stuff gets big enough 55 # it may be pushed into its own module. 56
57 - def checkUndefinedNonTerminals(self, nameGrammar):
58 """Check given grammar for undefined non-terminals. 59 60 An undefined non-terminal is a non-terminal symbol used in a symbol 61 definition that has no definition of its own and cannot therefore be 62 expanded. Undefined non-terminals can lead to ugly error messages 63 instead of beautifully generated names. 64 """ 65 for rule, rhs in nameGrammar.items(): 66 for option in rhs: 67 tempStr = option 68 matchNonTerminal = self.reNonTerminal.search(tempStr) 69 while matchNonTerminal: 70 if matchNonTerminal.group(1) not in nameGrammar: 71 return {"undefinedNonTerminal": matchNonTerminal.group(1), "rule": rule} 72 tempStr = self.reNonTerminal.sub("", tempStr, 1) 73 matchNonTerminal = self.reNonTerminal.search(tempStr)
74
75 - def checkUnproductiveNonTerminals(self, nameGrammar):
76 """Check grammar for possibly unproductive non-terminals. 77 78 An unproductive non-terminal is a non-terminal symbol that cannot be 79 converted to a terminal symbol in the given grammar. A good example of this 80 is a non-terminal symbol that includes itself in its definition. 81 82 This function is currently very basic and should be extended (rewritten?) to 83 allow warnings for _possible_ unproductive non-terminals and errors for 84 _definite_ unproductive non-terminals. Volunteers? 85 86 XXX: INCOMPLETE 87 """ 88 def recurse(a): 89 if a == 5: 90 return a 91 return recurse(a+1)
92 93 grammarUnchecked = dict([(rule, "".join(rhs)) for (rule, rhs) in nameGrammar.items()]) 94 grammarProductive = [] 95 finished = False 96 while not finished: 97 print "grammarProductive:" 98 print grammarProductive 99 print "grammarUnchecked:" 100 print grammarUnchecked 101 print 102 finished = True 103 for rule, rhs in grammarUnchecked.items(): 104 matchNonTerminal = reNonTerminal.search(rhs) 105 while matchNonTerminal: 106 matchString = matchNonTerminal.group(1) 107 if matchString not in grammarProductive: 108 break 109 rhs = rhs.replace("<"+matchString+">", "") 110 finished = False 111 matchNonTerminal = reNonTerminal.search(rhs) 112 if not matchNonTerminal: 113 grammarProductive.append(rule) 114 del grammarUnchecked[rule] 115 finished = False 116 continue 117 grammarUnchecked[rule] = rhs
118 119 # More grammar checking functions to come: 120 # Unused non-terminals 121 # Loop detection would be nice, but currently a little impractical. 122
123 - def checkUnusedNonTerminals(self, nameGrammar):
124 """Check grammar for non-terminals that can never be reached. 125 126 While unused non-terminals are irrelevant in the generation of sentences, 127 their presence usually implies an error in the grammar. 128 129 XXX: INCOMPLETE 130 """ 131 132 pass
133 134 # verifyGrammar() uses the above functions to verify the correctness of a 135 # grammar. This isn't perfect, but it should catch the most common problems.
136 - def verifyGrammar(self):
137 error = self.checkUndefinedNonTerminals(self.grammar) 138 if error: 139 return stringUndefinedNonTerminal % error 140 if "name" not in self.grammar: 141 return "Rule \"name\" not present!"
142 143 # Now to the meat of the problem, which is actually almost trivial thanks to 144 # the dictionary data type. I love python ;-) 145
146 - def getName(self):
147 nameStr = random.choice(self.grammar["name"]) 148 matchNonTerminal = self.reNonTerminal.search(nameStr) 149 while matchNonTerminal: 150 subStr = random.choice(self.grammar[matchNonTerminal.group(1)]) 151 nameStr = self.reNonTerminal.sub(subStr, nameStr, 1) 152 matchNonTerminal = self.reNonTerminal.search(nameStr) 153 return nameStr
154 155 156 if __name__ == "__main__": 157 # Main body 158 # Test grammar -- will be read from a file when I decide how to do it properly 159 # with minimum effort (for the user and the code) 160 orkGrammar = { 161 "name": ["<nameStart><nameMiddle0to3><nameEnd>"], 162 "nameMiddle0to3": ["","<nameMiddle>", "<nameMiddle><nameMiddle>", "<nameMiddle><nameMiddle><nameMiddle>"], 163 "nameStart": ["<nsCons><nmVowel>", "<nsCons><nmVowel>", "<nsCons><nmVowel>", "<nsVowel>"], 164 "nameMiddle": ["<nmCons><nmVowel>"], 165 "nameEnd": ["<neCons><neVowel>", "<neCons>", "<neCons>"], 166 "nsCons": ["D", "G", "K", "T", "Gr"], 167 "nmCons": ["d", "g", "k", "t", "r", "s", "z", "kt", "rs", "gr"], 168 "neCons": ["r", "s", "z"], 169 "nsVowel": ["E", "U"], 170 "nmVowel": ["a", "e", "i", "o", "u"], 171 "neVowel": ["a", "u"] 172 } 173 174 fooGrammar = { 175 "name": ["<nameStart><nameMiddle0to2><nameEnd>"], 176 "nameMiddle0to2": ["","<nameMiddle>", "<nameMiddle><nameMiddle>"], 177 "nameStart": ["<nsCons><nmVowel>", "<nsCons><nmVowel>", "<nsCons><nmVowel>", "<nsVowel>"], 178 "nameMiddle": ["<nmCons><nmVowel>"], 179 "nameEnd": ["<neCons><neVowel>", "<neCons>", "<neCons>"], 180 "nsCons": ["J", "M", "P", "N", "Y", "D", "F"], 181 "nmCons": ["l", "m", "lm", "th", "r", "s", "ss", "p", "f", "mb", "b", "lb", "d", "lf"], 182 "neCons": ["r", "n", "m", "s", "y", "l", "th", "b", "lb", "f", "lf"], 183 "nsVowel": ["A", "Au", "Ei"], 184 "nmVowel": ["a", "e", "i", "o", "u", "au", "oa", "ei"], 185 "neVowel": ["e", "i", "a", "au"] 186 } 187 188 print "Ork Grammar:", yaml.dump(orkGrammar) 189 print "Foo Grammar:", yaml.dump(fooGrammar) 190 fooGen = CFNameGen(fooGrammar) 191 errorStr = fooGen.verifyGrammar() 192 if errorStr: 193 sys.exit(errorStr) 194 print fooGen.getName() 195 196 orkGen = CFNameGen(orkGrammar) 197 errorStr = orkGen.verifyGrammar() 198 if errorStr: 199 sys.exit(errorStr) 200 print orkGen.getName() 201