(svn r1929) Feature: [namegen] Support for dynamic generation of the Czech town names.

The static names are still used in 1/4 of cases. I think the tables for
dynamic generation must look pretty spectacular. :-) New stems are still
needed and there can be occasional glitches, please let me know.

I guess that this method of dynamic generation could be used for at least
Slovak town names, too. And possibly other Slavic languages?
replace/41b28d7194a279bdc17475d4fbe2ea6ec885a466
pasky 20 years ago
parent 4325f13f74
commit d0f9cc27a3

@ -318,7 +318,160 @@ static byte MakePolishTownName(char *buf, uint32 seed)
static byte MakeCzechTownName(char *buf, uint32 seed)
{
strcpy(buf, name_czech_real[SeedChance(0, lengthof(name_czech_real), seed)]);
/* Probability of prefixes/suffixes */
/* 0..11 prefix, 12..13 prefix+suffix, 14..17 suffix, 18..31 nothing */
int prob_tails;
bool do_prefix, do_suffix, dynamic_subst;
/* IDs of the respective parts */
int prefix = 0, stem = 0, postfix = 0, ending = 0, suffix = 0;
/* The select criteria. */
enum CzechGender gender;
enum CzechChoose choose;
enum CzechAllow allow;
// 1:3 chance to use a real name.
if (SeedChance(0, 4, seed) == 0) {
strcpy(buf, name_czech_real[SeedChance(1, lengthof(name_czech_real), seed)]);
return 0;
}
// NUL terminates the string for strcat()
strcpy(buf, "");
prob_tails = SeedChance(2, 32, seed);
do_prefix = prob_tails < 12;
do_suffix = prob_tails > 11 && prob_tails < 17;
if (do_prefix) prefix = SeedChance(5, lengthof(name_czech_adj), seed);
if (do_suffix) suffix = SeedChance(7, lengthof(name_czech_suffix), seed);
// 3:1 chance 3:1 to use dynamic substantive
stem = SeedChance(9, lengthof(name_czech_subst_full)
+ 3 * lengthof(name_czech_subst_stem),
seed);
if (stem < (int) lengthof(name_czech_subst_full)) {
// That was easy!
dynamic_subst = false;
gender = name_czech_subst_full[stem].gender;
choose = name_czech_subst_full[stem].choose;
allow = name_czech_subst_full[stem].allow;
} else {
unsigned int map[lengthof(name_czech_subst_ending)];
int ending_start = -1, ending_stop = -1;
int i;
// Load the substantive
dynamic_subst = true;
stem -= lengthof(name_czech_subst_full);
stem %= lengthof(name_czech_subst_stem);
gender = name_czech_subst_stem[stem].gender;
choose = name_czech_subst_stem[stem].choose;
allow = name_czech_subst_stem[stem].allow;
// Load the postfix (1:1 chance that a postfix will be inserted)
postfix = SeedChance(14, lengthof(name_czech_subst_postfix) * 2, seed);
if (choose & CZC_POSTFIX) {
// Always get a real postfix.
postfix %= lengthof(name_czech_subst_postfix);
}
if (choose & CZC_NOPOSTFIX) {
// Always drop a postfix.
postfix += lengthof(name_czech_subst_postfix);
}
if (postfix < (int) lengthof(name_czech_subst_postfix))
choose |= CZC_POSTFIX;
else
choose |= CZC_NOPOSTFIX;
// Localize the array segment containing a good gender
for (ending = 0; ending < (int) lengthof(name_czech_subst_ending); ending++) {
const struct CzechNameSubst *e = &name_czech_subst_ending[ending];
if (gender == CZG_FREE
|| (gender == CZG_NFREE && e->gender != CZG_SNEUT && e->gender != CZG_PNEUT)
|| (gender == e->gender)) {
if (ending_start < 0)
ending_start = ending;
} else if (ending_start >= 0) {
ending_stop = ending - 1;
break;
}
}
if (ending_stop < 0) {
// Whoa. All the endings matched.
ending_stop = ending - 1;
}
// Make a sequential map of the items with good mask
i = 0;
for (ending = ending_start; ending <= ending_stop; ending++) {
const struct CzechNameSubst *e = &name_czech_subst_ending[ending];
if ((e->choose & choose) == choose && (e->allow & allow) != 0)
map[i++] = ending;
}
assert(i > 0);
// Load the ending
ending = map[SeedChance(16, i, seed)];
// Override possible CZG_*FREE; this must be a real gender,
// otherwise we get overflow when modifying the adjectivum.
gender = name_czech_subst_ending[ending].gender;
assert(gender != CZG_FREE && gender != CZG_NFREE);
}
if (do_prefix && (name_czech_adj[prefix].choose & choose) != choose) {
// Throw away non-matching prefix.
do_prefix = false;
}
// Now finally construct the name
if (do_prefix) {
enum CzechPattern pattern = name_czech_adj[prefix].pattern;
int endpos;
strcat(buf, name_czech_adj[prefix].name);
endpos = strlen(buf) - 1;
if (gender == CZG_SMASC && pattern == CZP_PRIVL) {
/* -ovX -> -uv */
buf[endpos - 2] = 'u';
assert(buf[endpos - 1] == 'v');
buf[endpos] = '\0';
} else {
buf[endpos] = name_czech_patmod[gender][pattern];
}
strcat(buf, " ");
}
if (dynamic_subst) {
strcat(buf, name_czech_subst_stem[stem].name);
if (postfix < (int) lengthof(name_czech_subst_postfix)) {
int postlen, endlen;
postlen = strlen(name_czech_subst_postfix[postfix]);
endlen = strlen(name_czech_subst_ending[ending].name);
// Kill the "avava" and "Jananna"-like cases
if (2 > postlen || postlen > endlen
|| (name_czech_subst_postfix[postfix][1]
!= name_czech_subst_ending[ending].name[1]
&& name_czech_subst_postfix[postfix][2]
!= name_czech_subst_ending[ending].name[1]))
strcat(buf, name_czech_subst_postfix[postfix]);
}
strcat(buf, name_czech_subst_ending[ending].name);
} else {
strcat(buf, name_czech_subst_full[stem].name);
}
if (do_suffix) {
strcat(buf, " ");
strcat(buf, name_czech_suffix[suffix]);
}
return 0;
}

@ -1659,6 +1659,228 @@ static const char *name_czech_real[] = {
"Znojmo"
};
/* The advanced hyperintelligent Czech town names generator! */
// Sing., pl.
enum CzechGender {
CZG_SMASC,
CZG_SFEM,
CZG_SNEUT,
CZG_PMASC,
CZG_PFEM,
CZG_PNEUT,
// Special for substantive stems - the ending chooses the gender.
CZG_FREE,
// Like CZG_FREE, but disallow CZG_SNEUT.
CZG_NFREE
};
enum CzechPattern {
CZP_JARNI,
CZP_MLADY,
CZP_PRIVL
};
/* [CzechGender][CzechPattern] - replaces the last character of the adjective
* by this. */
// XXX: [CZG_SMASC][CZP_PRIVL] needs special handling: -ovX -> -uv.
static const char name_czech_patmod[6][3] = {
/* CZG_SMASC */ { 'í', 'ý', 'X' },
/* CZG_SFEM */ { 'í', 'á', 'a' },
/* CZG_SNEUT */ { 'í', 'é', 'o' },
/* CZG_PMASC */ { 'í', 'é', 'y' },
/* CZG_PFEM */ { 'í', 'é', 'y' },
/* CZG_PNEUT */ { 'í', 'á', 'a' }
};
// This way the substantives can choose only some adjectives/endings:
// At least one of these flags must be satisfied:
enum CzechAllow {
CZA_SHORT = 1,
CZA_MIDDLE = 2,
CZA_LONG = 4,
CZA_ALL = ~0
};
// All these flags must be satisfied (in the stem->others direction):
enum CzechChoose {
CZC_NORMAL = 1,
CZC_COLOR = 2,
CZC_POSTFIX = 4, // Matched if postfix was inserted.
CZC_NOPOSTFIX = 8, // Matched if no postfix was inserted.
CZC_ANY = ~0
};
struct CzechNameSubst {
enum CzechGender gender;
enum CzechAllow allow;
enum CzechChoose choose;
const char *name;
};
struct CzechNameAdj {
enum CzechPattern pattern;
enum CzechChoose choose;
const char *name;
};
// Some of items which should be common are doubled.
static const struct CzechNameAdj name_czech_adj[] = {
{ CZP_JARNI, CZC_ANY, "Horní" },
{ CZP_JARNI, CZC_ANY, "Horní" },
{ CZP_JARNI, CZC_ANY, "Dolní" },
{ CZP_JARNI, CZC_ANY, "Dolní" },
{ CZP_JARNI, CZC_ANY, "Prední" },
{ CZP_JARNI, CZC_ANY, "Zadní" },
{ CZP_JARNI, CZC_ANY, "Kostelní" },
{ CZP_JARNI, CZC_ANY, "Havraní" },
{ CZP_JARNI, CZC_ANY, "Rícní" },
{ CZP_MLADY, CZC_ANY, "Velký" },
{ CZP_MLADY, CZC_ANY, "Velký" },
{ CZP_MLADY, CZC_ANY, "Malý" },
{ CZP_MLADY, CZC_ANY, "Malý" },
{ CZP_MLADY, CZC_ANY, "Vysoký" },
{ CZP_MLADY, CZC_ANY, "Ceský" },
{ CZP_MLADY, CZC_ANY, "Moravský" },
{ CZP_MLADY, CZC_ANY, "Slovácký" },
{ CZP_MLADY, CZC_ANY, "Uherský" },
{ CZP_MLADY, CZC_ANY, "Starý" },
{ CZP_MLADY, CZC_ANY, "Starý" },
{ CZP_MLADY, CZC_ANY, "Nový" },
{ CZP_MLADY, CZC_ANY, "Nový" },
{ CZP_MLADY, CZC_ANY, "Mladý" },
{ CZP_MLADY, CZC_ANY, "Královský" },
{ CZP_MLADY, CZC_ANY, "Kamenný" },
{ CZP_MLADY, CZC_ANY, "Cihlový" },
{ CZP_MLADY, CZC_ANY, "Divný" },
{ CZP_MLADY, CZC_COLOR, "Cervená" },
{ CZP_MLADY, CZC_COLOR, "Cervená" },
{ CZP_MLADY, CZC_COLOR, "Zelená" },
{ CZP_MLADY, CZC_COLOR, "Zlutá" },
{ CZP_MLADY, CZC_COLOR, "Sivá" },
{ CZP_MLADY, CZC_COLOR, "Sedá" },
{ CZP_MLADY, CZC_COLOR, "Bílá" },
{ CZP_MLADY, CZC_COLOR, "Modrá" },
{ CZP_MLADY, CZC_COLOR, "Ruzová" },
{ CZP_MLADY, CZC_COLOR, "Cerná" },
{ CZP_PRIVL, CZC_ANY, "Králova" },
{ CZP_PRIVL, CZC_ANY, "Janova" },
{ CZP_PRIVL, CZC_ANY, "Karlova" },
{ CZP_PRIVL, CZC_ANY, "Jiríkova" },
{ CZP_PRIVL, CZC_ANY, "Petrova" },
{ CZP_PRIVL, CZC_ANY, "Sudovo" },
};
// Considered a stem for choose/allow matching purposes.
static const struct CzechNameSubst name_czech_subst_full[] = {
{ CZG_SMASC, CZA_ALL, CZC_NORMAL | CZC_COLOR, "Sedlec" },
{ CZG_SMASC, CZA_ALL, CZC_NORMAL | CZC_COLOR, "Brod" },
{ CZG_SMASC, CZA_ALL, CZC_NORMAL | CZC_COLOR, "Brod" },
{ CZG_SMASC, CZA_ALL, CZC_NORMAL, "Úval" },
{ CZG_SFEM, CZA_ALL, CZC_NORMAL | CZC_COLOR, "Hora" },
{ CZG_SFEM, CZA_ALL, CZC_NORMAL | CZC_COLOR, "Lhota" },
{ CZG_SFEM, CZA_ALL, CZC_NORMAL | CZC_COLOR, "Lhota" },
{ CZG_SFEM, CZA_ALL, CZC_NORMAL | CZC_COLOR, "Hlava" },
{ CZG_SNEUT, CZA_ALL, CZC_NORMAL | CZC_COLOR, "Pole" },
{ CZG_SNEUT, CZA_ALL, CZC_NORMAL | CZC_COLOR, "Zdár" },
{ CZG_PMASC, CZA_ALL, CZC_NORMAL, "Úvaly" },
{ CZG_PFEM, CZA_ALL, CZC_NORMAL | CZC_COLOR, "Luka" },
{ CZG_PNEUT, CZA_ALL, CZC_NORMAL | CZC_COLOR, "Pole" },
};
// TODO: More stems needed. --pasky
static const struct CzechNameSubst name_czech_subst_stem[] = {
{ CZG_SMASC, CZA_MIDDLE, CZC_NORMAL | CZC_COLOR, "Kostel" },
{ CZG_SMASC, CZA_MIDDLE, CZC_NORMAL | CZC_COLOR, "Kláster" },
{ CZG_SMASC, CZA_SHORT, CZC_NORMAL | CZC_COLOR, "Lhot" },
{ CZG_SFEM, CZA_SHORT, CZC_NORMAL | CZC_COLOR, "Lhot" },
{ CZG_SFEM, CZA_SHORT, CZC_NORMAL | CZC_COLOR, "Hur" },
{ CZG_FREE, CZA_MIDDLE | CZA_LONG, CZC_NORMAL, "Sedl" },
{ CZG_FREE, CZA_SHORT | CZA_MIDDLE | CZA_LONG, CZC_NORMAL | CZC_COLOR, "Hrad" },
{ CZG_NFREE, CZA_MIDDLE, CZC_NORMAL, "Pras" },
{ CZG_NFREE, CZA_MIDDLE, CZC_NORMAL, "Baz" },
{ CZG_NFREE, CZA_MIDDLE, CZC_NORMAL, "Tes" },
{ CZG_NFREE, CZA_MIDDLE, CZC_NORMAL, "Uz" },
{ CZG_NFREE, CZA_MIDDLE | CZA_LONG, CZC_NORMAL, "Br" },
{ CZG_NFREE, CZA_MIDDLE | CZA_LONG, CZC_NORMAL, "Vod" },
{ CZG_NFREE, CZA_MIDDLE | CZA_LONG, CZC_NORMAL, "Jan" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Prach" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Kunr" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Strak" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Vit" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Vys" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Zat" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Zer" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Stred" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Harv" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Pruh" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Tach" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Písn" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Jin" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Jes" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Jar" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Sok" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Hod" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Net" },
{ CZG_FREE, CZA_LONG, CZC_NORMAL, "Praz" },
{ CZG_FREE, CZA_LONG, CZC_NORMAL, "Nerat" },
{ CZG_FREE, CZA_LONG, CZC_NORMAL, "Kral" },
{ CZG_FREE, CZA_LONG, CZC_NORMAL | CZC_NOPOSTFIX, "Pan" },
{ CZG_FREE, CZA_SHORT | CZA_MIDDLE | CZA_LONG, CZC_NORMAL, "Odstred" },
{ CZG_FREE, CZA_SHORT | CZA_MIDDLE | CZA_LONG, CZC_NORMAL | CZC_COLOR, "Mrat" },
{ CZG_FREE, CZA_LONG, CZC_NORMAL | CZC_COLOR, "Hlav" },
{ CZG_FREE, CZA_SHORT | CZA_MIDDLE, CZC_NORMAL, "Mer" },
};
// Optional postfix inserted between stem and ending.
static const char *name_czech_subst_postfix[] = {
"av", "an", "at",
"ov", "on", "ot",
"ev", "en", "et",
};
// This array must have the both neutral genders at the end!
static const struct CzechNameSubst name_czech_subst_ending[] = {
{ CZG_SMASC, CZA_SHORT | CZA_MIDDLE, CZC_ANY, "ec" },
{ CZG_SMASC, CZA_SHORT | CZA_MIDDLE, CZC_ANY, "ín" },
{ CZG_SMASC, CZA_SHORT | CZA_MIDDLE | CZA_LONG, CZC_ANY, "ov" },
{ CZG_SMASC, CZA_SHORT | CZA_LONG, CZC_ANY, "kov" },
{ CZG_SMASC, CZA_LONG, CZC_POSTFIX, "ín" },
{ CZG_SMASC, CZA_LONG, CZC_POSTFIX, "ník" },
{ CZG_SFEM, CZA_SHORT, CZC_ANY, "ka" },
{ CZG_SFEM, CZA_MIDDLE, CZC_ANY, "inka" },
{ CZG_SFEM, CZA_MIDDLE, CZC_NOPOSTFIX, "na" },
{ CZG_SFEM, CZA_MIDDLE, CZC_ANY, "" },
{ CZG_SFEM, CZA_LONG, CZC_ANY, "ava" },
{ CZG_PMASC, CZA_LONG, CZC_ANY, "íky" },
{ CZG_PMASC, CZA_LONG, CZC_ANY, "upy" },
{ CZG_PFEM, CZA_LONG, CZC_ANY, "avy" },
{ CZG_PFEM, CZA_SHORT | CZA_MIDDLE | CZA_LONG, CZC_ANY, "ice" },
{ CZG_PNEUT, CZA_SHORT | CZA_MIDDLE, CZC_ANY, "na" },
{ CZG_SNEUT, CZA_SHORT | CZA_MIDDLE, CZC_ANY, "no" },
{ CZG_SNEUT, CZA_LONG, CZC_ANY, "iste" },
};
static const char *name_czech_suffix[] = {
"nad Cydlinou",
"nad Dyjí",
"nad Jihlavou",
"nad Labem",
"nad Lesy",
"nad Moravou",
"nad Nisou",
"nad Odrou",
"nad Ostravicí",
"nad Sázavou",
"nad Vltavou",
"pod Pradedem",
"pod Radhostem",
"pod Rípem",
"pod Snezkou",
"pod Spicákem",
"pod Sedlem",
};
static const char *name_romanian_real[]= {
"Adjud",
"Alba Iulia",

Loading…
Cancel
Save