File:  [Local Repository] / db / prgsrc / makeauthors.pl
Revision 1.10: download - view: text, annotated - select for diffs - revision graph
Fri Sep 24 16:58:57 2010 UTC (13 years, 7 months ago) by roma7
Branches: MAIN
CVS tags: HEAD
authors fix

    1: #!/usr/bin/perl -w
    2: 
    3: =head1 NAME
    4: 
    5: makeauthors.pl - скрипт для создания таблиц авторов
    6: 
    7: =head1 SYNOPSIS
    8: 
    9: makeauthors.pl
   10: 
   11: =head1 DESCRIPTION
   12: 
   13: Скрипт создаёт и заполянет таблицы authors и A2Q, используя 
   14: информацию из файлов authors,nicks,ssnicks
   15: 
   16: =head1 AUTHOR
   17: 
   18: Роман Семизаров
   19: 
   20: 
   21: =cut
   22: 
   23: 
   24: use dbchgk;
   25: 
   26: my $DUMPDIR = $ENV{DUMPDIR} || "../dump";
   27: 
   28: do "chgk.cnf";
   29: use locale;
   30: use POSIX qw (locale_h);
   31: open NICKS, "<$nicksfile" or die "Can not open nicks";
   32: open SSNICKS, "<$ssnicksfile" or die "Can not open ssnicks";
   33: open UNKNOWN, ">$DUMPDIR/uauthors";
   34: open UNICKS, ">$DUMPDIR/unicks";
   35: open STDERR, ">$DUMPDIR/errors";
   36:   my ($thislocale);
   37:   if ($^O =~ /win/i) {
   38: 	$thislocale = "Russian_Russia.20866";
   39:   } else {
   40: 	$thislocale = "ru_RU.KOI8-R"; 
   41:   }
   42:   POSIX::setlocale( &POSIX::LC_ALL, $thislocale );
   43:   if ((uc 'а') ne 'А') {die "!Koi8-r locale not installed!\n"};
   44: 
   45: 
   46: 
   47: while (<NICKS>)
   48: {
   49:    ($number,$nick)=split;   
   50:    next unless $number;   
   51:    next unless $number=~/^\d+$/;
   52:    @parts = split ' ',<NICKS>;
   53:    $_ = ucfirst lc $_ foreach  @parts;
   54:    $surname = pop @parts;
   55:    my $name;
   56:    $name = $name{$nick}= join ' ', @parts;
   57:    $surname=~s/\-(.)/"-". uc $1/ge;
   58:    $surname=~s/\'(.)/"'". uc $1/ge;
   59:    $surname{$nick}= $surname;
   60:    $sn = "$name $surname";
   61: print "$name!$surname\n";
   62:    $sn =~ tr/Ёё/Ее/;
   63:    $nickfromname{uc $sn} = $nick;   
   64: }
   65: 
   66: $surname{'error'}='Глюков';
   67: $name{'error'}='Очепят';
   68: $surname{'unknown'}='Неизвестный';
   69: $name{'unknown'}='Псевдоним';
   70: $surname{'team'}='Авторов';
   71: $name{'team'}='Коллектив';
   72: 
   73: 
   74: while (<SSNICKS>)
   75: {
   76:    $str=$_;
   77:    ($number,$n)=split ' ',$str;
   78:    if ($number=~/\d+/) {$nick=$n;next}
   79:    $str=~s/^\s+//;
   80:    $str=~s/\s+$//;   
   81:    $str=~s/\s+/ /;
   82:    $ssnick{$nick}.="|$str";
   83: }
   84: 
   85: close (NICKS);
   86: close (SSNICKS);
   87: 
   88: open AUTHORS,"<$authorsfile" or die "Can not open authors";
   89: print "REading authors...\n";
   90: while (<AUTHORS>)
   91: {
   92: 
   93:    ($nick,$number,$descr)=m/^([a-zA-Z][a-zA-Z\s]+)(\d+)\s+(.*)$/g;
   94:    if (!$nick) 
   95:    {
   96:       ($number,$descr)=m/^(\d+)\s+(.*)$/g;
   97:       $nick='unknown';
   98:    }
   99: #   if ($nick=~s/\s*$//)
  100:    $descr=~s/([\.\,\:\!\?])/$1 /g;
  101:    $descr=~s/\\n/ /g;
  102:    $descr=~s/^\s+//g;
  103:    $descr=~s/\s+$//g;
  104:    $descr=~s/\s+/ /g;
  105:    $descr=uc $descr;
  106: # die "$descr" unless $descr;
  107: #   die "Duplicated description \"$descr\"" if ($nick{$descr});
  108:    $nick{$descr}=$nick;
  109:    foreach (split ' ', $nick)
  110:    {
  111:       $unknick{$_}=1  unless $name{$_}
  112:    }
  113: }
  114: 
  115: print "printing unknown...\n";
  116: foreach $as(keys %unknick)
  117: {
  118:     print UNICKS "$as \n ", (join "\n ", (grep {$nick{$_}=~/$as/} keys %nick));
  119:     print UNICKS "\n";
  120: }
  121: 
  122: %forbidden=tableexists('equalto')? getequalto : ();
  123: 
  124: #print scalar keys %forbidden, "forbidden questions\n";
  125: 
  126: getbase('QuestionId','Authors');
  127: while (($QuestionId, $author)=getrow,$QuestionId)
  128: {
  129:    print "." unless $i++ % 100;
  130:    next unless $author;
  131:    $author=~s/([\.\,\:\!\?])/$1 /gm;
  132:    $author=~s/^\s+//mg;
  133:    $author=~s/\\n/ /g;
  134:    $author=~s/\s+$//mg;
  135:    $author=~s/\s+/ /mg;
  136:    $author=uc $author;
  137:    $author=~s/ё/е/mg;
  138:    if ($nick = $nick{$author}) 
  139:    { 
  140:       my @a=split ' ',$nick;
  141:  foreach $tmp(@a) {
  142:    if ($tmp eq '!!!') {
  143:      print STDERR "!$author!".$QuestionId."\n";
  144:    }
  145:  }
  146:       push @{$questions{$_}},$QuestionId foreach @a;
  147:    }
  148:    else 
  149:    {
  150:       $unknown{$author}=1;
  151:    }
  152: }
  153: 
  154: 
  155: 
  156: print scalar keys %nick , " authors found\n";
  157: 
  158: 
  159: #print STDERR "$_ ".$name{$_}."!\n" foreach keys %name;
  160: addquestions2author($_,$name{$_},$surname{$_},$questions{$_},$ssnick{$_},\%forbidden) foreach keys %questions;
  161: 
  162: print UNKNOWN "$_\n" foreach sort keys %unknown;

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>