File:  [Local Repository] / db / prgsrc / updateRS.pl
Revision 1.3: download - view: text, annotated - select for diffs - revision graph
Thu Nov 1 01:45:00 2001 UTC (22 years, 6 months ago) by boris
Branches: MAIN
CVS tags: HEAD
added files to makefile. Added use lib "../lib";

    1: #!/usr/bin/perl -w
    2: 
    3: =head1 NAME
    4: 
    5: updateRS.pl - Скрипт для занесения в таблицы русского поиска базы 
    6: B<$base> информации о вопросах 
    7: 
    8: =head1 SYNOPSIS
    9: 
   10: updateRS.pl QuestionNumber
   11: 
   12: updateRS.pl 
   13: 
   14: 
   15: =head1 DESCRIPTION
   16: 
   17: Скрипт ищет в таблице Questions вопросы с нулевым ProcessedBySearch,
   18: добавляет информацию в таблицы word2question, nests, nf. Поле 
   19: ProcessedBySearch устанавливается в 1. Обрабатывает QuestionNumber
   20: вопросов. Если параметр QuestionNumber не указан, работает пока не 
   21: обработает все вопросы.
   22: 
   23: 
   24: 
   25: =head1 AUTHOR
   26: 
   27: Роман Семизаров
   28: 
   29: 
   30: =cut
   31: 
   32: 
   33: use locale;
   34: use DBI;
   35: use POSIX qw (locale_h);
   36: use lib "../lib";
   37: use chgkfiles;
   38: use dbchgk;
   39: 
   40: 
   41: do "common.pl";
   42: do "chgk.cnf";
   43: require "check.pl";
   44: 
   45: open (STDERR,">$stderr") if $stderr;
   46: 
   47: 
   48: open (UNKNOWN,">$unknown");
   49: 
   50: $qlimit=shift||500000;
   51: 
   52:           
   53: my $nf;
   54: 
   55: #open WARN, ">$warnings";
   56: 
   57: %forbidden=checktable('equalto')? getequalto : ();
   58: 
   59: 
   60: if ((uc 'а') ne 'А') {die "!Koi8-r locale not installed!\n"};
   61: 
   62: getquestions(QuestionId, Question, Answer, Comments, Authors, Sources,"ProcessedBySearch IS NULL");
   63: 
   64: 
   65: 
   66: print "Loading dictionaries\n";
   67: 
   68: die "No dictionaries! Check your chgk.cnf" unless scalar @dictionaries;
   69: 
   70: foreach $d(@dictionaries)
   71: {
   72:      print "Loading $d\n";
   73:      open (DICT, $d) || print "              Not found\n";
   74:      while ( <DICT> )
   75:      {
   76:      	chomp;
   77:      	s/\s*$//;
   78:        	($aa,$b)=split(/\//);
   79:     	$a= uc $aa;
   80:     	$words{$a}.=$b || "!";
   81:      }
   82:      close(DICT);
   83: }
   84: 
   85: die "No dictionaries found! Check your chgk.cnf" unless scalar keys %words;
   86: 
   87: 
   88: print "Getting words...\n";
   89: 
   90: 
   91: $sch=0;
   92: while ((++$sch<=$qlimit) && (($id, @ss) = getrow, $id)) 
   93: {
   94:    if ($forbidden{$id}) {next};
   95:    print "\n$id ";
   96:    foreach $fieldnumber (0..$#ss) #перебираем поля
   97:    {
   98:       $text=$ss[$fieldnumber];
   99:       next unless $text;
  100:       $text=~tr/ёЁ/еЕ/;
  101:       $text=~s/(${RLrl})p(${RLrl})/$1p$2/gom;
  102:       $text=~s/p(${RLrl})/р$1/gom;
  103:       $text=~s/(${RLrl})p/$1р/gom;
  104:       $text=~s/\s+/ /gmo;
  105:       @list= $text=~m/(?:(?:${RLrl})+)|(?:[A-Za-z0-9]+)/gom;
  106: 
  107:       foreach $wordnumber(0..$#list)
  108:       {
  109:            $word=$list[$wordnumber];
  110: 
  111:            if (@n=knownword(uc $word)) 
  112:            {
  113:                 incnf($_) foreach @n;
  114:                 updateword2question($_,packword($fieldnumber, $id,$wordnumber),1)
  115:                      foreach (@n);
  116: print ".";
  117:            }
  118:            else {
  119:              if ($word=~/^${RLrl}+$/o){ # Русское слово
  120:             # проанализировать по таблице аффиксов, 
  121:             # проверить наличие начальных форм в 
  122:             # nf, а если таких нет, то 
  123:             # и по словарю. 
  124:               
  125:                $nf=&checkit(uc $word,\%words);
  126: print "!";
  127:                if (!$nf) {
  128:                            $nf=(uc $word)."/!";
  129:                            print UNKNOWN "$nf\n" if $unknown;
  130:                          }
  131: 
  132:               }  else {# нерусское слово
  133:                          $nf=(uc $word)."/!";
  134:                          print UNKNOWN "$nf\n" if $unknown;
  135:                       }
  136: 
  137:                foreach $n (split ' ', $nf)
  138:                {
  139:                    ($f,$flag)=split '/', $n;
  140:                    if ($nfnumber=knownnf($f))
  141:                    {
  142:                        addnest(uc $word,$nfnumber);
  143:                        incnf($nfnumber);
  144:                        updateword2question($nfnumber,packword($fieldnumber,
  145:                           $id,$wordnumber),1)           
  146:                    }
  147:                    else 
  148:                    {
  149:                       $nfnumber=addnf(0, $f, $flag,1);
  150:                       addnest(uc $word,$nfnumber);
  151:                       updateword2question($nfnumber,packword($fieldnumber,
  152:                           $id,$wordnumber),0)           
  153:                    }
  154:                }
  155:            }
  156: 
  157:       }
  158:    }
  159:    searchmark($id);
  160: }
  161: 
  162: 
  163: 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>