From e76aa0fda8608927f263264e1bf850414530b8b6 Mon Sep 17 00:00:00 2001 From: Torne Wuff Date: Wed, 22 Jun 2011 10:54:02 +0100 Subject: [PATCH] Make genlang faster by doing better regexes. With this change generating all languages takes only two-thirds the time. It changes the acceptable syntax for target wildcards in language files, however: instead of a comma-separated list of glob-style wildcards it requires that it be a comma-separated list of prefix matches, i.e. the * can only appear at the end of each wildcard, and ? cannot be used. This does not require any changes to existing language files as they are all already in this form. --- tools/genlang | 37 +++++++++++++++++++++---------------- 1 files changed, 21 insertions(+), 16 deletions(-) diff --git a/tools/genlang b/tools/genlang index c8ccbd1..cc67a56 100755 --- a/tools/genlang +++ b/tools/genlang @@ -121,7 +121,23 @@ if(!$target && !$update && !$sortfile) { print STDERR "Please specify a target (with -t)!\n"; exit; } -my @target_parts = split ':', $target; + +# Build up a regex which can be applied to target wildcard lists. We only need +# to support prefix matches, so a target parameter of foo:bar can be expanded +# to the regex "\*|f\*|fo\*|foo|b\*|ba\*|bar" and applied to the wildcard list +# (plus end-of-string or commas on either side). The regex engine should +# discard any duplicates generated for us in the process of constructing the +# state machine, so we don't bother to check. +my $target_regex = "(?:^|,) *(?:\\*"; +foreach my $target_part (split ':', $target) { + for (my $c=1; $c $v\n"; - $$strref = $string; - return $string; - } - } + if ($n =~ $target_regex) { + $string = $v; + $$strref = $string; + return $string; } } -- 1.7.3.1