--- db/prgsrc/db.cgi 2000/10/19 22:34:56 1.4 +++ db/prgsrc/db.cgi 2001/10/22 20:53:53 1.27 @@ -1,15 +1,35 @@ -#!/usr/bin/perl -w +#!/usr/local/bin/perl -w use DBI; use CGI ':all'; +use Text::Query; use strict; use Time::Local; use POSIX qw(locale_h); +use locale; +open STDERR, ">errors"; +my $printqueries=1; +my %fieldname= (0,'Question', 1, 'Answer', 2, 'Comments', 3, 'Authors', 4, 'Sources'); +my %searchin; + + +my $thislocale; + + +$searchin{'question'}=param('Question'); +$searchin{'answer'}=param('Answer'); +$searchin{'comment'}=param('Comment'); +$searchin{'authors'}=param('Authors'); +$searchin{'sources'}=param('Sources'); +$printqueries||=param('debug'); +my $all=param('all'); +$all=0 if lc $all eq 'no'; my ($PWD) = `pwd`; chomp $PWD; my ($SRCPATH) = "$PWD/../dimrub/src"; -my ($ZIP) = "/usr/bin/zip"; +my ($ZIP) = "/home/piataev/bin/zip"; +my $DUMPFILE = "/tmp/chgkdump"; my ($SENDMAIL) = "/usr/sbin/sendmail"; my ($TMSECS) = 30*24*60*60; my (%RevMonths) = @@ -101,34 +121,300 @@ sub GetTours { return @Tours; } +sub russearch { + my ($dbh, $sstr, $all,$allnf)=@_; + my (@qw,@w,@tasks,$qw,@arr,$nf,$sth,@nf,$w,$where,$e,@where,%good,$i,%where,$from); + my($number,@good,$t,$task,@rho,$rank,%rank,$r2,$r1,$word,$n,@last,$good,@words,%number,$taskid); + my ($hi, $lo, $wordnumber,$query,$blob,$field,$sf,$ii); + my @frequence; + my (@arr1,@ar,@sf,@arr2); + my %tasks; + my $tasks; + my @verybad; + my %nf; + my %tasksof; + my %wordsof; + my %relevance; + my @blob; + my %count; + +$sstr=~tr/йцукенгшщзхъфывапролджэячсмитьбю/ЙЦУКЕНГШЩЗХЪФЫВАПРОЛДЖЭЯЧСМИТЬБЮ/; + @qw=@w =split (' ', uc $sstr); + +#----------- + foreach $i (0..$#w) # заполняем массив @nf начальных форм + # $nf[$i] -- ссылка на массив возможных + # начальных форм словоформы $i + { + $qw= $dbh->quote (uc $w[$i]); + $query=" select distinct w2 from nests + where w1=$qw"; +print "$query",br if $printqueries; + $sth=$dbh -> prepare($query); + $sth -> execute; + @{$nf[$i]}=(); + while (@arr = $sth->fetchrow) + { + push (@{$nf[$i]},$arr[0]) + } + } + + my @bad=grep {!$nf[$_]} 0..$#w; # @bad -- номера словоформ, + # которых нет в словаре + + if (@bad) #есть неопознанные словоформы + { + require "cw.pl"; + foreach $i(@bad) + { + if (@arr=checkword($dbh,$w[$i])) + {push (@{$nf[$i]}, @arr);} + else + {push (@verybad,$i);} + } + } + return () if ($all && @verybad); + + my $kvo=0; + push @$allnf, @{$_} foreach @nf; +print "allnf=@$allnf\n".br if $printqueries; + + foreach $i (0..$#w) #запросы в базу... + { + @arr=@{$nf[$i]} if $nf[$i]; + @arr2=@arr1=@arr; + + + + + $_= " word2question.word=".$_. ' ' foreach @arr; + $_= " nf.id=".$_. ' ' foreach @arr1; + +# $_= " nests.w2=".$_. ' ' foreach @arr2; +# $query="select w1 from nests where". (join ' OR ', @arr2); +#print $query if $printqueries; +# $sth=$dbh -> prepare($query); +# $sth->execute; +# while (@ar=$sth->fetchrow) +# { +# $ar[0]=~s/(.)/&nocase($1)/ge; +# push(@sf,'(?:'.$ar[0].')'); +# } +# $selectshablon=join '|',@sf; + +#print $selectshablon,br if $printqueries; + +# $selectshablon=qr/$selectshablon/i; + + + + + $query="select questions from word2question where". (join ' OR ', @arr); +print "$query\n",br if $printqueries; + + $sth=$dbh -> prepare($query); + $sth->execute; + + @blob=(); + while (@arr=$sth->fetchrow) + { + @blob=(@blob,unpack 'C*',$arr[0]); + } + $query="select number from nf where ".(join ' OR ', @arr1); +print "$query\n",br if $printqueries; + $sth=$dbh -> prepare($query); + $sth->execute; + + while (@arr=$sth->fetchrow) + { + $frequence[$i]+=$arr[0]; + } + + + + + if (@blob < 4) + { + $tasksof{$i}=undef; + } else + { + $kvo++; + $ii=0; + while ($ii<$#blob) # создаём хэш %tasksof, ключи которого -- + # номера искомых словоформ, а значения -- + # списки вопросов, в которых есть соответствующа + # словоформа. + # Каждый список в свою очередь также оформлен в + # виде хэша, ключи которого -- номера вопросов, + # а значения -- списки номеров вхождений. Вот. + { + ($field,$lo,$hi,$wordnumber)=@blob[$ii..($ii+3)]; + $ii+=4; + $number=$lo+$hi*256; + $field=$fieldname{$field}; + if ($searchin{lc $field}) + { + + push @{$tasksof{$i}{$number}}, $wordnumber; + # дополнили в хэше, висящем на + # словоформе $i в %tasksof список + # вхождений $i в вопрос $number. + push @{$wordsof{$number}{$i}}, $wordnumber; + # дополнили в хэше, висящем на + # вопросе $number в %wordsof список + # вхождений $i в вопрос $number. + + + } + } #while ($ii<$#blob) + } + } #foreach $i + +print "keys tasksof", keys %tasksof if $printqueries; +#Ищем пересечение или объединение списков вопросов (значений %tasksof) + foreach $sf (keys %tasksof) + { + $count{$_}++ foreach keys %{$tasksof{$sf}}; + } + @tasks= ($all ? (grep {$count{$_}==$kvo} keys %count) : + keys %count) ; + + +#print "\n\$#tasks=",$#tasks,br; +############ Сортировка найденных вопросов + +foreach (keys %wordsof) +{ + $relevance{$_}=&relevance($#w,$wordsof{$_},\@frequence) if $_ +} + +@tasks=sort {$relevance{$b}<=>$relevance{$a}} @tasks; + + +############ + +print "tasks=@tasks" if $printqueries;; + +#print "$_ $relevance{$_} | " foreach @tasks; +#print br; +print "allnf=@$allnf",br if $printqueries; + return @tasks; +} + + +sub distance { + # на входе -- номера словоформ и ссылки на + # списки вхождений. На выходе -- расстояние, + # вычисляемое по формуле min(|b-a-pb+pa|) + # pb,pa + # (pb и pa -- позиции слов b и a) + my ($a,$b,$lista,$listb)=@_; + my ($pa,$pb,$min,$curmin); + $min=10000; + foreach $pa (@$lista) + { + foreach $pb (@$listb) + { + $curmin=abs($b-$a-$pb+$pa); + $min= $curmin if $curmin<$min; + } + } + return $min; + +} + +sub relevance { + # На входе -- количество искомых словоформ -1 и + # ссылка на hash, ключи которого -- + # номера словоформ, а значения -- списки вхождений + + my ($n,$words,$frequence)=@_; + my $relevance=0; + my ($first,$second,$d); + foreach $first (0..$n) + { + $relevance+=scalar @{$$words{$first}}+1000+1000/$$frequence[$first] +if $$words{$first}; + foreach $second ($first+1..$n) + { + $d=&distance($first,$second,$$words{$first},$$words{$second}); + $relevance+=($d>10?0:10-$d)*10; + } + } + return $relevance; +} + + # Returns list of QuestionId's, that have the search string in them. sub Search { - my ($dbh, $sstr) = @_; + my ($dbh, $sstr,$metod,$all,$allnf) = @_; my (@arr, @Questions, @fields); - my (@sar, $i, $sth); + my (@sar, $i, $sth,$where); + +# push @fields, 'Question'; - push @fields, 'Question'; - foreach (qw/Answer Sources Authors Comments/) { + if ($metod eq 'rus') + { + my @tasks=russearch($dbh,$sstr,$all,$allnf); + return @tasks + } + + +###Simple and advanced query processing. Added by R7 + if ($metod eq 'simple' || $metod eq 'advanced') + { + foreach (qw/Question Answer Sources Authors Comments/) { if (param($_)) { - push @fields, "IFNULL($_, '')"; + push @fields, $_; } } - @sar = split " ", $sstr; - for $i (0 .. $#sar) { + + @fields=(qw/Question Answer Sources Authors Comments/) unless scalar @fields; + my $fields=join ",", @fields; + my $q=new Text::Query($sstr, + -parse => 'Text::Query::'. + (($metod eq 'simple') ? 'ParseSimple':'ParseAdvanced'), + -solve => 'Text::Query::SolveSQL', + -build => 'Text::Query::BuildSQLMySQL', + -fields_searched => $fields); + + $where= $$q{'matchexp'}; + my $query= "SELECT Questionid FROM Questions + WHERE $where"; + print br."Query is: $query".br if $printqueries; + + $sth = $dbh->prepare($query); + } else +###### + { + + + foreach (qw/Question Answer Sources Authors Comments/) { + if (param($_)) { + push @fields, "IFNULL($_, '')"; + } + } + + @sar = split " ", $sstr; + for $i (0 .. $#sar) { $sar[$i] = $dbh->quote("%${sar[$i]}%"); - } + } - my($f) = "CONCAT(" . join(',', @fields) . ")"; - if (param('all') eq 'yes') { + my($f) = "CONCAT(" . join(',', @fields) . ")"; + if (param('all') eq 'yes') { $sstr = join " AND $f LIKE ", @sar; - } else { + } else { $sstr = join " OR $f LIKE ", @sar; - } - - $sth = $dbh->prepare("SELECT QuestionId FROM Questions - WHERE $f LIKE $sstr ORDER BY QuestionId"); + } + +my $query="SELECT QuestionId FROM Questions + WHERE $f LIKE $sstr ORDER BY QuestionId"; +print $query if $printqueries; + $sth = $dbh->prepare($query) + + } #else -- processing old-style query (R7) $sth->execute; while (@arr = $sth->fetchrow) { @@ -154,10 +440,37 @@ sub NoCase { } sub PrintSearch { - my ($dbh, $sstr) = @_; - my (@Questions) = &Search($dbh, $sstr); + my ($dbh, $sstr, $metod) = @_; + my @allnf; + my (@Questions) = &Search($dbh, $sstr,$metod,$all,\@allnf); my ($output, $i, $suffix, $hits) = ('', 0, '', $#Questions + 1); + my $shablon; + + + if ($metod eq 'rus') + { + my $where='0'; + $where.= " or w2=$_ " foreach @allnf; + my $query="select w1 from nests where $where"; + my $sth=$dbh->prepare($query); +print "$query" if $printqueries; + + $sth->execute; + my @shablon; + while (my @arr = $sth->fetchrow) + { + push @shablon,"(?:$arr[0])"; + } + $shablon= join "|", @shablon; + $shablon=~s/[её]/\[ЕЁ\]/gi; +# $shablon=~s/([йцукенгшщзхъфывапролджэячсмитьбюЙЦУКЕНГШЩЗХЪФЫВАПРОЛДЖЭЯЧСМИТЬБЮ])/&NoCase($1)/ge; + $shablon=qr/$shablon/i; + print "!$shablon!",br if $printqueries; + } + + + if ($hits =~ /1.$/ || $hits =~ /[5-90]$/) { $suffix = 'й'; } elsif ($hits =~ /1$/) { @@ -175,12 +488,16 @@ sub PrintSearch { $sstr =~ s/(.)/&NoCase($1)/ge; - my(@sar) = split(/\s/, $sstr); + my(@sar) = split(' ', $sstr); for ($i = 0; $i <= $#Questions; $i++) { $output = &PrintQuestion($dbh, $Questions[$i], 1, $i + 1, 1); + if (param('metod') eq 'rus') + { + $output=~s/\b($shablon)\b/\$1\<\/strong\>/gi; + } else { foreach (@sar) { $output =~ s/$_/$&<\/strong>/gs; - } + }} print $output; } } @@ -224,15 +541,21 @@ sub PrintTournament { for ($Tournament{'Type'}) { /Г/ && do { $output .= h2({align=>"center"}, - "Группа: $Tournament{'Title'}") . p . "\n"; + "Группа: $Tournament{'Title'} ", + "$Tournament{'PlayedAt'}") . p . "\n"; last; }; /Ч/ && do { return &PrintTour($dbh, $Tours[0], $answer) if ($#Tours == 0); + + my $title="Пакет: $Tournament{'Title'}"; + if ($Tournament{'PlayedAt'}) { + $title .= " $Tournament{'PlayedAt'}"; + } $output .= h2({align=>"center"}, - "Чемпионат: $Tournament{'Title'}") . p . "\n"; + "$title") . p . "\n"; last; }; /Т/ && do { @@ -269,7 +592,8 @@ sub PrintTournament { if ($SingleTour or $Tournament{'Type'} =~ /Т/) { $list .= dd(img({src=>$imgsrc, alt=>$alt}) - . " " . $Tournament{'Title'} . $qnum) . + . " " . $Tournament{'Title'} . " " . + $Tournament{'PlayedAt'} . $qnum) . dl( dd("[" . a({href=>url . "?tour=$Tournament{'Id'}&answer=0"}, @@ -282,7 +606,8 @@ sub PrintTournament { img({src=>'/icons/compressed.gif', alt=>'[ZIP]', border=>1})) . " " . img({src=>$imgsrc, alt=>$alt}) . " " . a({href=>url . "?tour=$Tournament{'Id'}&answer=0"}, - $Tournament{'Title'}) . $qnum); + $Tournament{'Title'}. " ". + $Tournament{'PlayedAt'}) . $qnum); } } $output .= dl($list); @@ -337,6 +662,7 @@ sub PrintTour { my ($suffix) = &Suffix($qnum); $output .= h2({align=>"center"}, $Tournament{"Title"}, + $Tournament{'PlayedAt'}, "
", $Tour{"Title"} . " ($qnum вопрос$suffix)\n") . p; @@ -392,7 +718,10 @@ sub PrintTour { sub PrintField { my ($header, $value, $text) = @_; if ($text) { - $value =~ s/<[\/\w]*>//sg; + $value =~ s/<[\/\w]*>//sg; + } else { + $value =~ s/^\s+/
    /mg; + $value =~ s/^\|([^\n]*)/
$1<\/pre>/mg;
 	}
 	return $text ? "$header:\n$value\n\n" : 
 		strong("$header: ") . $value . p . "\n";
@@ -412,7 +741,7 @@ sub PrintQuestion {
 			my (%Tournament) = GetTournament($dbh, $Tour{'ParentId'});
 			$titles .=
 				dd(img({src=>"/icons/folder.open.gif"}) . " " .
-					 a({href=>url . "?tour=$Tournament{'Id'}"}, $Tournament{'Title'}));
+					 a({href=>url . "?tour=$Tournament{'Id'}"}, $Tournament{'Title'}, $Tournament{'PlayedAt'}));
 			$titles .=
 				dl(dd(img({src=>"/icons/folder.open.gif"}) . " " .
 					a({href=>url . "?tour=$Tour{'Id'}"}, $Tour{'Title'})));
@@ -452,27 +781,33 @@ sub GetQNum {
 	$sth->execute;
  	return ($sth->fetchrow)[0];
 }
+sub GetMaxQId {
+	my ($dbh) = @_;
+	my ($sth) = $dbh->prepare("SELECT MAX(QuestionId) FROM Questions");
+	$sth->execute;
+ 	return ($sth->fetchrow)[0];
+}
 
 # Returns Id's of 12 random questions
 sub Get12Random {
    my ($dbh, $type, $num) = @_;
 	my ($i, @questions, $q, $t, $sth);
-	my ($qnum) = &GetQNum($dbh);
+	my ($qnum) = &GetMaxQId($dbh);
 	my (%chosen);
 	srand;
 	
-	for ($i = 0; $i < $num; $i++) {
-		do {
-			$q = int(rand($qnum));
-			$sth = $dbh->prepare("SELECT Type FROM Questions
+   for ($i = 0; $i < $num; $i++) {
+       do {
+	   $q = int(rand($qnum));
+	   $sth = $dbh->prepare("SELECT Type FROM Questions
 				WHERE QuestionId=$q");
-			$sth->execute;
-			$t = ($sth->fetchrow)[0];
-		} until !$chosen{$q} && $t =~ /$type/;
-		$chosen{$q} = 'y';
-		push @questions, $q;
-	}
-	return @questions;
+	   $sth->execute;
+	   $t = ($sth->fetchrow)[0];
+       } until !$chosen{$q} && $t && $type =~ /[$t]/;
+       $chosen{$q} = 'y';
+       push @questions, $q;
+   }
+   return @questions;
 }
 
 sub Include_virtual {
@@ -524,7 +859,7 @@ sub PrintAll {
 	} else {
 		$output .= dd(img({src=>"/icons/folder.gif", alt=>"[*]"}) .
       " " . a({href=>url . "?tour=$Tournament{'Id'}&answer=0"},
-      $Tournament{'Title'}) . " $New");
+      $Tournament{'Title'}) ." " . $Tournament{'PlayedAt'} . " $New");
 	}
 	if ($Id == 0 or $Tournament{'Type'} =~ /Г/) {
 		for ($i = 0; $i <= $#Tours; $i++) {
@@ -558,7 +893,7 @@ sub PrintDates {
 		%Tournament = &GetTournament($dbh, $array[0]);
       $list .= dd(img({src=>"/icons/folder.gif", alt=>"[*]"}) .
       " " . a({href=>url . "?tour=$Tournament{'Id'}&answer=0"},
-      $Tournament{'Title'}));
+      $Tournament{'Title'}, $Tournament{'PlayedAt'}));
 	}
 	$output .= dl($list);
 	return $output;
@@ -577,7 +912,7 @@ MAIN:
 		    print end_html;
 			die "Can't connect to DB chgk\n";
 		};
-	if (!param('comp') and !$text) {
+	if (!param('comp') and !param('sqldump') and !$text) {
 	   print header;
 	   print start_html(-"title"=>'Database of the questions',
 	           -author=>'dimrub@icomverse.com',
@@ -586,21 +921,33 @@ MAIN:
 		print &Include_virtual("../dimrub/db/reklama.html");
 	}
 
+if ($^O =~ /win/i) {
+	$thislocale = "Russian_Russia.20866";
+} else {
+	$thislocale = "ru_RU.KOI8-R";
+}
+POSIX::setlocale( &POSIX::LC_ALL, $thislocale );
+
+if ((uc 'а') ne 'А') {print "Koi8-r locale not installed!\n"};
+
+
 	if ($text) {
 		print header('text/plain');
 	}
 
 	if (param('rand')) {
-		my ($type, $qnum) = ('Ч', 12);
-		$type = 'Б' if (param('brain'));
+		my ($type, $qnum) = ('', 12);
+		$type .= 'Б' if (param('brain'));
+		$type .= 'Ч' if (param('chgk'));
 		$qnum = param('qnum') if (param('qnum') =~ /^\d+$/);	
+		$qnum = 0 if (!$type);
 		if (param('email') && -x $SENDMAIL && 
 		open(F, "| $SENDMAIL -t -n")) {
 			my ($Email) = param('email');
 			my ($mime_type) = $text ? "plain" : "html";
 			print F < 'application/x-zip-compressed; name="db.zip"',
-			-'Content-Disposition' => 'attachment; filename="db.zip"'
-		);
-		$tour = (param('tour')) ? param('tour') : 0;
-		my (@files) = &PrintArchive($dbh, $tour);
-		open F, "$ZIP -j - $SRCPATH/COPYRIGHT @files |";
-		print ();
-		close F;
-		$dbh->disconnect;
-		exit;
+	    print header(
+			 -'Content-Type' => 'application/x-zip-compressed; name="db.zip"',
+			 -'Content-Disposition' => 'attachment; filename="db.zip"'
+			 );
+	    $tour = (param('tour')) ? param('tour') : 0;
+	    my (@files) = &PrintArchive($dbh, $tour);
+	    open F, "$ZIP -j - $SRCPATH/COPYRIGHT @files |";
+	    print ();
+	    close F;
+	    $dbh->disconnect;
+	    exit;
+	} elsif (param('sqldump')) {
+	    print header(
+			 -'Content-Type' => 'application/x-zip-compressed; name="dump.zip"',
+			 -'Content-Disposition' => 'attachment; filename="dump.zip"'
+			 );
+	    open F, "$ZIP -j - $DUMPFILE |";
+	    print ();
+	    close F;
+	    $dbh->disconnect;
+	    exit;
+
 	} else {
 		$tour = (param('tour')) ? param('tour') : 0;
 		if ($tour !~ /^[0-9]*$/) {