--- db/prgsrc/db.cgi 2000/10/24 18:03:49 1.12 +++ db/prgsrc/db.cgi 2001/11/19 01:31:47 1.30 @@ -2,10 +2,29 @@ use DBI; use CGI ':all'; +use Text::Query; use strict; use Time::Local; use POSIX qw(locale_h); +use locale; +open STDERR, ">errors"; +my $printqueries=1; +my $debug=1; #added by R7 +my %fieldname= (0,'Question', 1, 'Answer', 2, 'Comments', 3, 'Authors', 4, 'Sources'); +my %searchin; + + +my $thislocale; + + +$searchin{'question'}=param('Question'); +$searchin{'answer'}=param('Answer'); +$searchin{'comment'}=param('Comment'); +$searchin{'authors'}=param('Authors'); +$searchin{'sources'}=param('Sources'); +my $all=param('all'); +$all=0 if lc $all eq 'no'; my ($PWD) = `pwd`; chomp $PWD; my ($SRCPATH) = "$PWD/../dimrub/src"; @@ -102,34 +121,297 @@ sub GetTours { return @Tours; } +sub russearch { + my ($dbh, $sstr, $all,$allnf)=@_; + my (@qw,@w,@tasks,$qw,@arr,$nf,$sth,@nf,$w,$where,$e,@where,%good,$i,%where,$from); + my($number,@good,$t,$task,@rho,$rank,%rank,$r2,$r1,$word,$n,@last,$good,@words,%number,$taskid); + my ($hi, $lo, $wordnumber,$query,$blob,$field,$sf,$ii); + my @frequence; + my (@arr1,@ar,@sf,@arr2); + my %tasks; + my $tasks; + my @verybad; + my %nf; + my %tasksof; + my %wordsof; + my %relevance; + my @blob; + my %count; + +$sstr=~tr/йцукенгшщзхъфывапролджэячсмитьбю/ЙЦУКЕНГШЩЗХЪФЫВАПРОЛДЖЭЯЧСМИТЬБЮ/; + @qw=@w =split (' ', uc $sstr); + +#----------- + foreach $i (0..$#w) # заполняем массив @nf начальных форм + # $nf[$i] -- ссылка на массив возможных + # начальных форм словоформы $i + { + $qw= $dbh->quote (uc $w[$i]); + $query=" select distinct w2 from nests + where w1=$qw"; +print "$query",br if $printqueries; + $sth=$dbh -> prepare($query); + $sth -> execute; + @{$nf[$i]}=(); + while (@arr = $sth->fetchrow) + { + push (@{$nf[$i]},$arr[0]) + } + } + + + my @bad=grep {!$nf[$_]} 0..$#w; # @bad -- номера словоформ, + # которых нет в словаре + + if (@bad) #есть неопознанные словоформы + { + require "cw.pl"; + foreach $i(@bad) + { + if (@arr=checkword($dbh,$w[$i])) + {push (@{$nf[$i]}, @arr);} + else + {push (@verybad,$i);} + } + } + return () if ($all && @verybad); + + + my $kvo=0; + push @$allnf, @{$_} foreach @nf; +print "nf=@$allnf"; + + foreach $i (0..$#w) #запросы в базу... + { + @arr=@{$nf[$i]} if $nf[$i]; + @arr2=@arr1=@arr; + + + + + $_= " word2question.word=".$_. ' ' foreach @arr; + $_= " nf.id=".$_. ' ' foreach @arr1; + +# $_= " nests.w2=".$_. ' ' foreach @arr2; +# $query="select w1 from nests where". (join ' OR ', @arr2); +#print $query if $printqueries; +# $sth=$dbh -> prepare($query); +# $sth->execute; +# while (@ar=$sth->fetchrow) +# { +# $ar[0]=~s/(.)/&nocase($1)/ge; +# push(@sf,'(?:'.$ar[0].')'); +# } +# $selectshablon=join '|',@sf; + +#print $selectshablon,br if $printqueries; + +# $selectshablon=qr/$selectshablon/i; + + + + + $query="select questions from word2question where". (join ' OR ', @arr); +print "$query\n",br if $printqueries; + + $sth=$dbh -> prepare($query); + $sth->execute; + + @blob=(); + while (@arr=$sth->fetchrow) + { + @blob=(@blob,unpack 'C*',$arr[0]); + } + $query="select number from nf where ".(join ' OR ', @arr1); +print "$query\n",br if $printqueries; + $sth=$dbh -> prepare($query); + $sth->execute; + + while (@arr=$sth->fetchrow) + { + $frequence[$i]+=$arr[0]; + } + + + + if (@blob < 4) + { + $tasksof{$i}=undef; + } else + { + $kvo++; + $ii=0; + while ($ii<$#blob) # создаём хэш %tasksof, ключи которого -- + # номера искомых словоформ, а значения -- + # списки вопросов, в которых есть соответствующа + # словоформа. + # Каждый список в свою очередь также оформлен в + # виде хэша, ключи которого -- номера вопросов, + # а значения -- списки номеров вхождений. Вот. + { + ($field,$lo,$hi,$wordnumber)=@blob[$ii..($ii+3)]; + $ii+=4; + $number=$lo+$hi*256; + $field=$fieldname{$field}; + if ($searchin{lc $field}) + { + push @{$tasksof{$i}{$number}}, $wordnumber; + # дополнили в хэше, висящем на + # словоформе $i в %tasksof список + # вхождений $i в вопрос $number. + push @{$wordsof{$number}{$i}}, $wordnumber; + # дополнили в хэше, висящем на + # вопросе $number в %wordsof список + # вхождений $i в вопрос $number. + + + } + } #while ($ii<$#blob) + } + } #foreach $i + +#print "keys tasksof", join ' ', keys %{$tasksof{0}}; +#Ищем пересечение или объединение списков вопросов (значений %tasksof) + foreach $sf (keys %tasksof) + { + $count{$_}++ foreach keys %{$tasksof{$sf}}; + } + @tasks= ($all ? (grep {$count{$_}==$kvo} keys %count) : + keys %count) ; + + +print "\n\$#tasks=",$#tasks,br if $printqueries; +############ Сортировка найденных вопросов + +foreach (keys %wordsof) +{ + $relevance{$_}=&relevance($#w,$wordsof{$_},\@frequence) if $_ +} + +@tasks=sort {$relevance{$b}<=>$relevance{$a}} @tasks; + + +############ + +print "tasks=@tasks"; + +#print "$_ $relevance{$_} | " foreach @tasks; +#print br; +print "allnf=@$allnf",br if $printqueries; + return @tasks; +} + + +sub distance { + # на входе -- номера словоформ и ссылки на + # списки вхождений. На выходе -- расстояние, + # вычисляемое по формуле min(|b-a-pb+pa|) + # pb,pa + # (pb и pa -- позиции слов b и a) + my ($a,$b,$lista,$listb)=@_; + my ($pa,$pb,$min,$curmin); + $min=10000; + foreach $pa (@$lista) + { + foreach $pb (@$listb) + { + $curmin=abs($b-$a-$pb+$pa); + $min= $curmin if $curmin<$min; + } + } + return $min; + +} + +sub relevance { + # На входе -- количество искомых словоформ -1 и + # ссылка на hash, ключи которого -- + # номера словоформ, а значения -- списки вхождений + + my ($n,$words,$frequence)=@_; + my $relevance=0; + my ($first,$second,$d); + foreach $first (0..$n) + { + $relevance+=scalar @{$$words{$first}}+1000+1000/$$frequence[$first] +if $$words{$first}; + foreach $second ($first+1..$n) + { + $d=&distance($first,$second,$$words{$first},$$words{$second}); + $relevance+=($d>10?0:10-$d)*10; + } + } + return $relevance; +} + + # Returns list of QuestionId's, that have the search string in them. sub Search { - my ($dbh, $sstr) = @_; + my ($dbh, $sstr,$metod,$all,$allnf) = @_; my (@arr, @Questions, @fields); - my (@sar, $i, $sth); + my (@sar, $i, $sth,$where); # push @fields, 'Question'; - foreach (qw/Question Answer Sources Authors Comments/) { + + if ($metod eq 'rus') + { + my @tasks=russearch($dbh,$sstr,$all,$allnf); + return @tasks + } + + +###Simple and advanced query processing. Added by R7 + if ($metod eq 'simple' || $metod eq 'advanced') + { + foreach (qw/Question Answer Sources Authors Comments/) { if (param($_)) { - push @fields, "IFNULL($_, '')"; + push @fields, $_; } - } + } - @sar = split " ", $sstr; - for $i (0 .. $#sar) { + @fields=(qw/Question Answer Sources Authors Comments/) unless scalar @fields; + my $fields=join ",", @fields; + my $q=new Text::Query($sstr, + -parse => 'Text::Query::'. + (($metod eq 'simple') ? 'ParseSimple':'ParseAdvanced'), + -solve => 'Text::Query::SolveSQL', + -build => 'Text::Query::BuildSQLMySQL', + -fields_searched => $fields); + + $where= $$q{'matchexp'}; + my $query= "SELECT Questionid FROM Questions + WHERE $where"; + print br."Query is: $query".br if $debug; + + $sth = $dbh->prepare($query); + } else +###### + { + + foreach (qw/Question Answer Sources Authors Comments/) { + if (param($_)) { + push @fields, "IFNULL($_, '')"; + } + } + @sar = split " ", $sstr; + for $i (0 .. $#sar) { $sar[$i] = $dbh->quote("%${sar[$i]}%"); - } + } - my($f) = "CONCAT(" . join(',', @fields) . ")"; - if (param('all') eq 'yes') { + my($f) = "CONCAT(" . join(',', @fields) . ")"; + if (param('all') eq 'yes') { $sstr = join " AND $f LIKE ", @sar; - } else { + } else { $sstr = join " OR $f LIKE ", @sar; - } - - $sth = $dbh->prepare("SELECT QuestionId FROM Questions - WHERE $f LIKE $sstr ORDER BY QuestionId"); + } + +my $query="SELECT QuestionId FROM Questions + WHERE $f LIKE $sstr ORDER BY QuestionId"; +print $query if $printqueries; + $sth = $dbh->prepare($query) + + } #else -- processing old-style query (R7) $sth->execute; while (@arr = $sth->fetchrow) { @@ -155,10 +437,36 @@ sub NoCase { } sub PrintSearch { - my ($dbh, $sstr) = @_; - my (@Questions) = &Search($dbh, $sstr); + my ($dbh, $sstr, $metod) = @_; + my @allnf; + my (@Questions) = &Search($dbh, $sstr,$metod,$all,\@allnf); my ($output, $i, $suffix, $hits) = ('', 0, '', $#Questions + 1); + my $shablon; + + if ($metod eq 'rus') + { + my $where='0'; + $where.= " or w2=$_ " foreach @allnf; + my $query="select w1 from nests where $where"; + my $sth=$dbh->prepare($query); +print "$query" if $printqueries; + + $sth->execute; + my @shablon; + while (my @arr = $sth->fetchrow) + { + push @shablon,"(?:$arr[0])"; + } + $shablon= join "|", @shablon; + $shablon=~s/[её]/\[ЕЁ\]/gi; +# $shablon=~s/([йцукенгшщзхъфывапролджэячсмитьбюЙЦУКЕНГШЩЗХЪФЫВАПРОЛДЖЭЯЧСМИТЬБЮ])/&NoCase($1)/ge; + $shablon=qr/$shablon/i; + print "!$shablon!",br if $printqueries; + } + + + if ($hits =~ /1.$/ || $hits =~ /[5-90]$/) { $suffix = 'й'; } elsif ($hits =~ /1$/) { @@ -176,12 +484,16 @@ sub PrintSearch { $sstr =~ s/(.)/&NoCase($1)/ge; - my(@sar) = split(/\s/, $sstr); + my(@sar) = split(' ', $sstr); for ($i = 0; $i <= $#Questions; $i++) { $output = &PrintQuestion($dbh, $Questions[$i], 1, $i + 1, 1); + if (param('metod') eq 'rus') + { + $output=~s/\b($shablon)\b/\$1\<\/strong\>/gi; + } else { foreach (@sar) { $output =~ s/$_/$&<\/strong>/gs; - } + }} print $output; } } @@ -407,6 +719,7 @@ sub PrintField { $value =~ s/^\s+/
    /mg; $value =~ s/^\|([^\n]*)/
$1<\/pre>/mg;
 	}
+
 	return $text ? "$header:\n$value\n\n" : 
 		strong("$header: ") . $value . p . "\n";
 }
@@ -444,7 +757,32 @@ sub PrintQuestion {
 			&PrintField("Ответ", $Question{'Answer'}, $text);
 
 		if ($Question{'Authors'}) {
-			$output .= &PrintField("Автор(ы)", $Question{'Authors'}, $text);
+                      my $q=$Question{'Authors'};
+
+		      my $sth=$dbh->prepare("select Authors.Id,Name, Surname, Nicks from Authors, A2Q
+                                 where Authors.Id=Author And Question=$Id");
+                      $sth->execute;
+                      my ($AuthorId,$Name, $Surname,$other,$Nicks);
+
+                      while ((($AuthorId,$Name, $Surname,$Nicks)=$sth->fetchrow),$AuthorId)
+                      {
+                        my ($firstletter)=$Name=~m/^./g;
+#		        $other.=a({href=>url."?qofauthor=$AuthorId"},"$Name $Surname").". ";
+                          my $sha="(?:$Name\\s+$Surname)|(?:$Surname\\s+$Name)|(?:$firstletter\\.\\s*$Surname)|(?:$Surname\\s+$firstletter\\.)|(?:$Surname)|(?:$Name)";
+                          $Nicks=~s/^\|//;
+                          foreach (split /\|/, $Nicks)
+                          {
+                              s/ /\\s+/;
+                              if (s/>$//) {$sha="$sha|(?:$_)"}
+                              else        {$sha="(?:$_)|$sha"}
+                          }
+#$output.=br."sha=$sha".br;
+                          $q=~s/($sha)/a({href=>url."?qofauthor=$AuthorId"},$1)/ei;
+                      }
+
+			$output .= &PrintField("Автор(ы)", $q, $text);
+
+#                        $output.= &PrintField("Другие вопросы", $other);
 		}
 
 		if ($Question{'Sources'}) {
@@ -583,6 +921,87 @@ sub PrintDates {
 	return $output;
 }
 
+sub PrintQOfAuthor
+{
+    my ($dbh, $id) = @_;
+   $id=$dbh->quote($id);
+    my $sth =  $dbh->prepare("SELECT  Name, Surname FROM Authors WHERE Id=$id");    
+    $sth->execute;
+    my ($name,$surname)=$sth->fetchrow;
+
+    $sth =  $dbh->prepare("SELECT Question FROM A2Q WHERE Author=$id");    
+    $sth->execute;
+    my $q;
+    my @Questions;	
+    while (($q)=$sth->fetchrow,$q)
+     {push @Questions,$q;}
+
+    my ($output, $i, $suffix, $hits) = ('', 0, '', $#Questions + 1);
+
+    if ($hits =~ /1.$/  || $hits =~ /[5-90]$/) {
+		$suffix = 'й';
+	} elsif ($hits =~ /1$/) {
+		$suffix = 'е';
+	} else {
+		$suffix = 'я'; 
+	}
+	
+	print p({align=>"center"}, "Автор ".strong("$name $surname. ") 
+	. " : $hits попадани$suffix.");
+
+
+	for ($i = 0; $i <= $#Questions; $i++) {
+		$output = &PrintQuestion($dbh, $Questions[$i], 1, $i + 1, 1);
+		print $output;
+	}
+}
+
+
+sub PrintAuthors
+{
+     my ($dbh,$sort)=@_;
+     my($output,$out1,@array,$sth);
+     if ($sort eq 'surname') 
+     {
+        $sth = 
+             $dbh->prepare("SELECT Id, Name, Surname, QNumber FROM Authors order by Surname");
+        
+        $output.="";
+     }
+     else
+     {
+        $sth = 
+             $dbh->prepare("SELECT Id, Name, Surname, QNumber FROM Authors Order by QNumber DESC");      
+        $output.="
Алфавитный список авторов
"; + } + + $sth->execute; + $output.=Tr(th["Фамилия, имя", "Количество вопросов"]); + + $out1=''; + + my $ar=$sth->fetchall_arrayref; + + + + foreach my $arr(@$ar) + { + + my ($id,$name,$surname,$kvo)=@$arr; + if (!$name || !$surname) {#print "Opanki at $id\n" + } else + { + my $add=Tr(td([a({href=>"/cgi-bin/db.cgi?qofauthor=$id"},'[Q] ')."$name $surname", $kvo]))."\n"; + print STDERR $add; + $output.=$add; + } + } + $output.="
"; + return $output; +} + + + MAIN: { setlocale(LC_CTYPE,'russian'); @@ -605,6 +1024,16 @@ MAIN: print &Include_virtual("../dimrub/db/reklama.html"); } +if ($^O =~ /win/i) { + $thislocale = "Russian_Russia.20866"; +} else { + $thislocale = "ru_RU.KOI8-R"; +} +POSIX::setlocale( &POSIX::LC_ALL, $thislocale ); + +if ((uc 'а') ne 'А') {print "Koi8-r locale not installed!\n"}; + + if ($text) { print header('text/plain'); } @@ -634,8 +1063,15 @@ EOT } else { print &PrintRandom($dbh, $type, $qnum, $text); } - } elsif (param('sstr')) { - &PrintSearch($dbh, param('sstr')); + } + elsif (param('authors')){ + print &PrintAuthors($dbh,param('authors')); + } + elsif (param('qofauthor')){ + &PrintQOfAuthor($dbh,param('qofauthor')); + } + elsif (param('sstr')) { + &PrintSearch($dbh, param('sstr'), param('metod')); } elsif (param('all')) { print &PrintAll($dbh, 0); } elsif (param('from_year') && param('to_year')) {