--- db/prgsrc/db.cgi 2000/10/15 19:49:22 1.1 +++ db/prgsrc/db.cgi 2001/11/20 21:33:36 1.34 @@ -2,20 +2,43 @@ use DBI; use CGI ':all'; +use Text::Query; use strict; use Time::Local; use POSIX qw(locale_h); +use locale; +#open STDERR, ">errors"; +my $printqueries=1; +my $debug=0; #added by R7 +my %fieldname= (0,'Question', 1, 'Answer', 2, 'Comments', 3, 'Authors', 4, 'Sources'); +my %searchin; + + +my $thislocale; + + +$searchin{'question'}=param('Question'); +$searchin{'answer'}=param('Answer'); +$searchin{'comment'}=param('Comment'); +$searchin{'authors'}=param('Authors'); +$searchin{'sources'}=param('Sources'); +my $all=param('all'); +$all=0 if lc $all eq 'no'; my ($PWD) = `pwd`; chomp $PWD; my ($SRCPATH) = "$PWD/../dimrub/src"; -my ($ZIP) = "/usr/local/bin/zip"; +my ($ZIP) = "/home/piataev/bin/zip"; +my $DUMPFILE = "/tmp/chgkdump"; my ($SENDMAIL) = "/usr/sbin/sendmail"; my ($TMSECS) = 30*24*60*60; my (%RevMonths) = ('Jan', '0', 'Feb', '1', 'Mar', '2', 'Apr', '3', 'May', '4', 'Jun', '5', 'Jul', '6', 'Aug', '7', 'Sep', '8', 'Oct', '9', 'Nov', '10', - 'Dec', '11'); + 'Dec', '11', + 'Янв', '0', 'Фев', 1, 'Мар', 2, 'Апр', 3, 'Май', '4', + 'Июн', '5', 'Июл', 6, 'Авг', '7', 'Сен', '8', + 'Окт', '9', 'Ноя', '19', 'Дек', '11'); # Determine whether the given time is within 2 months from now. sub NewEnough { @@ -98,34 +121,297 @@ sub GetTours { return @Tours; } +sub russearch { + my ($dbh, $sstr, $all,$allnf)=@_; + my (@qw,@w,@tasks,$qw,@arr,$nf,$sth,@nf,$w,$where,$e,@where,%good,$i,%where,$from); + my($number,@good,$t,$task,@rho,$rank,%rank,$r2,$r1,$word,$n,@last,$good,@words,%number,$taskid); + my ($hi, $lo, $wordnumber,$query,$blob,$field,$sf,$ii); + my @frequence; + my (@arr1,@ar,@sf,@arr2); + my %tasks; + my $tasks; + my @verybad; + my %nf; + my %tasksof; + my %wordsof; + my %relevance; + my @blob; + my %count; + +$sstr=~tr/йцукенгшщзхъфывапролджэячсмитьбю/ЙЦУКЕНГШЩЗХЪФЫВАПРОЛДЖЭЯЧСМИТЬБЮ/; + @qw=@w =split (' ', uc $sstr); + +#----------- + foreach $i (0..$#w) # заполняем массив @nf начальных форм + # $nf[$i] -- ссылка на массив возможных + # начальных форм словоформы $i + { + $qw= $dbh->quote (uc $w[$i]); + $query=" select distinct w2 from nests + where w1=$qw"; +print "$query",br if $printqueries; + $sth=$dbh -> prepare($query); + $sth -> execute; + @{$nf[$i]}=(); + while (@arr = $sth->fetchrow) + { + push (@{$nf[$i]},$arr[0]) + } + } + + + my @bad=grep {!$nf[$_]} 0..$#w; # @bad -- номера словоформ, + # которых нет в словаре + + if (@bad) #есть неопознанные словоформы + { + require "cw.pl"; + foreach $i(@bad) + { + if (@arr=checkword($dbh,$w[$i])) + {push (@{$nf[$i]}, @arr);} + else + {push (@verybad,$i);} + } + } + return () if ($all && @verybad); + + + my $kvo=0; + push @$allnf, @{$_} foreach @nf; +print "nf=@$allnf"; + + foreach $i (0..$#w) #запросы в базу... + { + @arr=@{$nf[$i]} if $nf[$i]; + @arr2=@arr1=@arr; + + + + + $_= " word2question.word=".$_. ' ' foreach @arr; + $_= " nf.id=".$_. ' ' foreach @arr1; + +# $_= " nests.w2=".$_. ' ' foreach @arr2; +# $query="select w1 from nests where". (join ' OR ', @arr2); +#print $query if $printqueries; +# $sth=$dbh -> prepare($query); +# $sth->execute; +# while (@ar=$sth->fetchrow) +# { +# $ar[0]=~s/(.)/&nocase($1)/ge; +# push(@sf,'(?:'.$ar[0].')'); +# } +# $selectshablon=join '|',@sf; + +#print $selectshablon,br if $printqueries; + +# $selectshablon=qr/$selectshablon/i; + + + + + $query="select questions from word2question where". (join ' OR ', @arr); +print "$query\n",br if $printqueries; + + $sth=$dbh -> prepare($query); + $sth->execute; + + @blob=(); + while (@arr=$sth->fetchrow) + { + @blob=(@blob,unpack 'C*',$arr[0]); + } + $query="select number from nf where ".(join ' OR ', @arr1); +print "$query\n",br if $printqueries; + $sth=$dbh -> prepare($query); + $sth->execute; + + while (@arr=$sth->fetchrow) + { + $frequence[$i]+=$arr[0]; + } + + + + if (@blob < 4) + { + $tasksof{$i}=undef; + } else + { + $kvo++; + $ii=0; + while ($ii<$#blob) # создаём хэш %tasksof, ключи которого -- + # номера искомых словоформ, а значения -- + # списки вопросов, в которых есть соответствующа + # словоформа. + # Каждый список в свою очередь также оформлен в + # виде хэша, ключи которого -- номера вопросов, + # а значения -- списки номеров вхождений. Вот. + { + ($field,$lo,$hi,$wordnumber)=@blob[$ii..($ii+3)]; + $ii+=4; + $number=$lo+$hi*256; + $field=$fieldname{$field}; + if ($searchin{lc $field}) + { + push @{$tasksof{$i}{$number}}, $wordnumber; + # дополнили в хэше, висящем на + # словоформе $i в %tasksof список + # вхождений $i в вопрос $number. + push @{$wordsof{$number}{$i}}, $wordnumber; + # дополнили в хэше, висящем на + # вопросе $number в %wordsof список + # вхождений $i в вопрос $number. + + + } + } #while ($ii<$#blob) + } + } #foreach $i + +#print "keys tasksof", join ' ', keys %{$tasksof{0}}; +#Ищем пересечение или объединение списков вопросов (значений %tasksof) + foreach $sf (keys %tasksof) + { + $count{$_}++ foreach keys %{$tasksof{$sf}}; + } + @tasks= ($all ? (grep {$count{$_}==$kvo} keys %count) : + keys %count) ; + + +print "\n\$#tasks=",$#tasks,br if $printqueries; +############ Сортировка найденных вопросов + +foreach (keys %wordsof) +{ + $relevance{$_}=&relevance($#w,$wordsof{$_},\@frequence) if $_ +} + +@tasks=sort {$relevance{$b}<=>$relevance{$a}} @tasks; + + +############ + +print "tasks=@tasks"; + +#print "$_ $relevance{$_} | " foreach @tasks; +#print br; +print "allnf=@$allnf",br if $printqueries; + return @tasks; +} + + +sub distance { + # на входе -- номера словоформ и ссылки на + # списки вхождений. На выходе -- расстояние, + # вычисляемое по формуле min(|b-a-pb+pa|) + # pb,pa + # (pb и pa -- позиции слов b и a) + my ($a,$b,$lista,$listb)=@_; + my ($pa,$pb,$min,$curmin); + $min=10000; + foreach $pa (@$lista) + { + foreach $pb (@$listb) + { + $curmin=abs($b-$a-$pb+$pa); + $min= $curmin if $curmin<$min; + } + } + return $min; + +} + +sub relevance { + # На входе -- количество искомых словоформ -1 и + # ссылка на hash, ключи которого -- + # номера словоформ, а значения -- списки вхождений + + my ($n,$words,$frequence)=@_; + my $relevance=0; + my ($first,$second,$d); + foreach $first (0..$n) + { + $relevance+=scalar @{$$words{$first}}+1000+1000/$$frequence[$first] +if $$words{$first}; + foreach $second ($first+1..$n) + { + $d=&distance($first,$second,$$words{$first},$$words{$second}); + $relevance+=($d>10?0:10-$d)*10; + } + } + return $relevance; +} + + # Returns list of QuestionId's, that have the search string in them. sub Search { - my ($dbh, $sstr) = @_; + my ($dbh, $sstr,$metod,$all,$allnf) = @_; my (@arr, @Questions, @fields); - my (@sar, $i, $sth); + my (@sar, $i, $sth,$where); - push @fields, 'Question'; - foreach (qw/Answer Sources Authors Comments/) { +# push @fields, 'Question'; + + if ($metod eq 'rus') + { + my @tasks=russearch($dbh,$sstr,$all,$allnf); + return @tasks + } + + +###Simple and advanced query processing. Added by R7 + if ($metod eq 'simple' || $metod eq 'advanced') + { + foreach (qw/Question Answer Sources Authors Comments/) { if (param($_)) { - push @fields, "IFNULL($_, '')"; + push @fields, $_; } - } + } - @sar = split " ", $sstr; - for $i (0 .. $#sar) { + @fields=(qw/Question Answer Sources Authors Comments/) unless scalar @fields; + my $fields=join ",", @fields; + my $q=new Text::Query($sstr, + -parse => 'Text::Query::'. + (($metod eq 'simple') ? 'ParseSimple':'ParseAdvanced'), + -solve => 'Text::Query::SolveSQL', + -build => 'Text::Query::BuildSQLMySQL', + -fields_searched => $fields); + + $where= $$q{'matchexp'}; + my $query= "SELECT Questionid FROM Questions + WHERE $where"; + print br."Query is: $query".br if $debug; + + $sth = $dbh->prepare($query); + } else +###### + { + + foreach (qw/Question Answer Sources Authors Comments/) { + if (param($_)) { + push @fields, "IFNULL($_, '')"; + } + } + @sar = split " ", $sstr; + for $i (0 .. $#sar) { $sar[$i] = $dbh->quote("%${sar[$i]}%"); - } + } - my($f) = "CONCAT(" . join(',', @fields) . ")"; - if (param('all') eq 'yes') { + my($f) = "CONCAT(" . join(',', @fields) . ")"; + if (param('all') eq 'yes') { $sstr = join " AND $f LIKE ", @sar; - } else { + } else { $sstr = join " OR $f LIKE ", @sar; - } - - $sth = $dbh->prepare("SELECT QuestionId FROM Questions - WHERE $f LIKE $sstr ORDER BY QuestionId"); + } + +my $query="SELECT QuestionId FROM Questions + WHERE $f LIKE $sstr ORDER BY QuestionId"; +print $query if $printqueries; + $sth = $dbh->prepare($query) + + } #else -- processing old-style query (R7) $sth->execute; while (@arr = $sth->fetchrow) { @@ -151,10 +437,36 @@ sub NoCase { } sub PrintSearch { - my ($dbh, $sstr) = @_; - my (@Questions) = &Search($dbh, $sstr); + my ($dbh, $sstr, $metod) = @_; + my @allnf; + my (@Questions) = &Search($dbh, $sstr,$metod,$all,\@allnf); my ($output, $i, $suffix, $hits) = ('', 0, '', $#Questions + 1); + my $shablon; + + if ($metod eq 'rus') + { + my $where='0'; + $where.= " or w2=$_ " foreach @allnf; + my $query="select w1 from nests where $where"; + my $sth=$dbh->prepare($query); +print "$query" if $printqueries; + + $sth->execute; + my @shablon; + while (my @arr = $sth->fetchrow) + { + push @shablon,"(?:$arr[0])"; + } + $shablon= join "|", @shablon; + $shablon=~s/[её]/\[ЕЁ\]/gi; +# $shablon=~s/([йцукенгшщзхъфывапролджэячсмитьбюЙЦУКЕНГШЩЗХЪФЫВАПРОЛДЖЭЯЧСМИТЬБЮ])/&NoCase($1)/ge; + $shablon=qr/$shablon/i; + print "!$shablon!",br if $printqueries; + } + + + if ($hits =~ /1.$/ || $hits =~ /[5-90]$/) { $suffix = 'й'; } elsif ($hits =~ /1$/) { @@ -172,12 +484,16 @@ sub PrintSearch { $sstr =~ s/(.)/&NoCase($1)/ge; - my(@sar) = split(/\s/, $sstr); + my(@sar) = split(' ', $sstr); for ($i = 0; $i <= $#Questions; $i++) { $output = &PrintQuestion($dbh, $Questions[$i], 1, $i + 1, 1); + if (param('metod') eq 'rus') + { + $output=~s/\b($shablon)\b/\$1\<\/strong\>/gi; + } else { foreach (@sar) { $output =~ s/$_/$&<\/strong>/gs; - } + }} print $output; } } @@ -221,15 +537,21 @@ sub PrintTournament { for ($Tournament{'Type'}) { /Г/ && do { $output .= h2({align=>"center"}, - "Группа: $Tournament{'Title'}") . p . "\n"; + "Группа: $Tournament{'Title'} ", + "$Tournament{'PlayedAt'}") . p . "\n"; last; }; /Ч/ && do { return &PrintTour($dbh, $Tours[0], $answer) if ($#Tours == 0); + + my $title="Пакет: $Tournament{'Title'}"; + if ($Tournament{'PlayedAt'}) { + $title .= " $Tournament{'PlayedAt'}"; + } $output .= h2({align=>"center"}, - "Чемпионат: $Tournament{'Title'}") . p . "\n"; + "$title") . p . "\n"; last; }; /Т/ && do { @@ -266,7 +588,8 @@ sub PrintTournament { if ($SingleTour or $Tournament{'Type'} =~ /Т/) { $list .= dd(img({src=>$imgsrc, alt=>$alt}) - . " " . $Tournament{'Title'} . $qnum) . + . " " . $Tournament{'Title'} . " " . + $Tournament{'PlayedAt'} . $qnum) . dl( dd("[" . a({href=>url . "?tour=$Tournament{'Id'}&answer=0"}, @@ -279,7 +602,8 @@ sub PrintTournament { img({src=>'/icons/compressed.gif', alt=>'[ZIP]', border=>1})) . " " . img({src=>$imgsrc, alt=>$alt}) . " " . a({href=>url . "?tour=$Tournament{'Id'}&answer=0"}, - $Tournament{'Title'}) . $qnum); + $Tournament{'Title'}. " ". + $Tournament{'PlayedAt'}) . $qnum); } } $output .= dl($list); @@ -333,7 +657,9 @@ sub PrintTour { my ($qnum) = $Tour{'QuestionsNum'}; my ($suffix) = &Suffix($qnum); - $output .= h2({align=>"center"}, $Tour{"Title"} . + $output .= h2({align=>"center"}, $Tournament{"Title"}, + $Tournament{'PlayedAt'}, + "
", $Tour{"Title"} . " ($qnum вопрос$suffix)\n") . p; my (@Questions) = &GetTourQuestions($dbh, $Id); @@ -388,8 +714,12 @@ sub PrintTour { sub PrintField { my ($header, $value, $text) = @_; if ($text) { - $value =~ s/<[\/\w]*>//sg; + $value =~ s/<[\/\w]*>//sg; + } else { + $value =~ s/^\s+/
    /mg; + $value =~ s/^\|([^\n]*)/
$1<\/pre>/mg;
 	}
+
 	return $text ? "$header:\n$value\n\n" : 
 		strong("$header: ") . $value . p . "\n";
 }
@@ -408,7 +738,7 @@ sub PrintQuestion {
 			my (%Tournament) = GetTournament($dbh, $Tour{'ParentId'});
 			$titles .=
 				dd(img({src=>"/icons/folder.open.gif"}) . " " .
-					 a({href=>url . "?tour=$Tournament{'Id'}"}, $Tournament{'Title'}));
+					 a({href=>url . "?tour=$Tournament{'Id'}"}, $Tournament{'Title'}, $Tournament{'PlayedAt'}));
 			$titles .=
 				dl(dd(img({src=>"/icons/folder.open.gif"}) . " " .
 					a({href=>url . "?tour=$Tour{'Id'}"}, $Tour{'Title'})));
@@ -427,7 +757,32 @@ sub PrintQuestion {
 			&PrintField("Ответ", $Question{'Answer'}, $text);
 
 		if ($Question{'Authors'}) {
-			$output .= &PrintField("Автор(ы)", $Question{'Authors'}, $text);
+                      my $q=$Question{'Authors'};
+
+		      my $sth=$dbh->prepare("select Authors.Id,Name, Surname, Nicks from Authors, A2Q
+                                 where Authors.Id=Author And Question=$Id");
+                      $sth->execute;
+                      my ($AuthorId,$Name, $Surname,$other,$Nicks);
+
+                      while ((($AuthorId,$Name, $Surname,$Nicks)=$sth->fetchrow),$AuthorId)
+                      {
+                        my ($firstletter)=$Name=~m/^./g;
+#		        $other.=a({href=>url."?qofauthor=$AuthorId"},"$Name $Surname").". ";
+                          my $sha="(?:$Name\\s+$Surname)|(?:$Surname\\s+$Name)|(?:$firstletter\\.\\s*$Surname)|(?:$Surname\\s+$firstletter\\.)|(?:$Surname)|(?:$Name)";
+                          $Nicks=~s/^\|//;
+                          foreach (split /\|/, $Nicks)
+                          {
+                              s/ /\\s+/;
+                              if (s/>$//) {$sha="$sha|(?:$_)"}
+                              else        {$sha="(?:$_)|$sha"}
+                          }
+#$output.=br."sha=$sha".br;
+                          $q=~s/($sha)/a({href=>url."?qofauthor=$AuthorId"},$1)/ei;
+                      }
+
+			$output .= &PrintField("Автор(ы)", $q, $text);
+
+#                        $output.= &PrintField("Другие вопросы", $other);
 		}
 
 		if ($Question{'Sources'}) {
@@ -448,34 +803,40 @@ sub GetQNum {
 	$sth->execute;
  	return ($sth->fetchrow)[0];
 }
+sub GetMaxQId {
+	my ($dbh) = @_;
+	my ($sth) = $dbh->prepare("SELECT MAX(QuestionId) FROM Questions");
+	$sth->execute;
+ 	return ($sth->fetchrow)[0];
+}
 
 # Returns Id's of 12 random questions
 sub Get12Random {
    my ($dbh, $type, $num) = @_;
 	my ($i, @questions, $q, $t, $sth);
-	my ($qnum) = &GetQNum($dbh);
+	my ($qnum) = &GetMaxQId($dbh);
 	my (%chosen);
 	srand;
 	
-	for ($i = 0; $i < $num; $i++) {
-		do {
-			$q = int(rand($qnum));
-			$sth = $dbh->prepare("SELECT Type FROM Questions
+   for ($i = 0; $i < $num; $i++) {
+       do {
+	   $q = int(rand($qnum));
+	   $sth = $dbh->prepare("SELECT Type FROM Questions
 				WHERE QuestionId=$q");
-			$sth->execute;
-			$t = ($sth->fetchrow)[0];
-		} until !$chosen{$q} && $t =~ /$type/;
-		$chosen{$q} = 'y';
-		push @questions, $q;
-	}
-	return @questions;
+	   $sth->execute;
+	   $t = ($sth->fetchrow)[0];
+       } until !$chosen{$q} && $t && $type =~ /[$t]/;
+       $chosen{$q} = 'y';
+       push @questions, $q;
+   }
+   return @questions;
 }
 
 sub Include_virtual {
 	my ($fn, $output) = (@_, '');
 
 	open F , $fn
-		or die "Can't open the file $fn: $!\n";
+		or return; #die "Can't open the file $fn: $!\n";
 	
 	while () {
 		if (/