version 1.20, 2001/05/17 23:31:08
|
version 1.25, 2001/10/22 20:12:29
|
Line 1
|
Line 1
|
#!/usr/bin/perl -w |
#!/usr/local/bin/perl -w |
|
|
use DBI; |
use DBI; |
use CGI ':all'; |
use CGI ':all'; |
Line 6 use Text::Query;
|
Line 6 use Text::Query;
|
use strict; |
use strict; |
use Time::Local; |
use Time::Local; |
use POSIX qw(locale_h); |
use POSIX qw(locale_h); |
my $debug=1; #added by R7 |
use locale; |
open STDERR, ">/tmp/errors" if $debug; |
open STDERR, ">errors"; |
|
my $printqueries=0; |
|
my %fieldname= (0,'Question', 1, 'Answer', 2, 'Comments', 3, 'Authors', 4, 'Sources'); |
|
my %searchin; |
|
|
|
|
|
|
|
my $thislocale; |
|
|
|
|
|
$searchin{'question'}=param('Question'); |
|
$searchin{'answer'}=param('Answer'); |
|
$searchin{'comment'}=param('Comment'); |
|
$searchin{'authors'}=param('Authors'); |
|
$searchin{'sources'}=param('Sources'); |
|
$printqueries||=param('debug'); |
|
my $all=param('all'); |
|
$all=0 if lc $all eq 'no'; |
my ($PWD) = `pwd`; |
my ($PWD) = `pwd`; |
chomp $PWD; |
chomp $PWD; |
my ($SRCPATH) = "$PWD/../dimrub/src"; |
my ($SRCPATH) = "$PWD/../dimrub/src"; |
Line 104 sub GetTours {
|
Line 121 sub GetTours {
|
return @Tours; |
return @Tours; |
} |
} |
|
|
|
sub russearch { |
|
my ($dbh, $sstr, $all,$allnf)=@_; |
|
my (@qw,@w,@tasks,$qw,@arr,$nf,$sth,@nf,$w,$where,$e,@where,%good,$i,%where,$from); |
|
my($number,@good,$t,$task,@rho,$rank,%rank,$r2,$r1,$word,$n,@last,$good,@words,%number,$taskid); |
|
my ($hi, $lo, $wordnumber,$query,$blob,$field,$sf,$ii); |
|
my @frequence; |
|
my (@arr1,@ar,@sf,@arr2); |
|
my %tasks; |
|
my $tasks; |
|
my @verybad; |
|
my %nf; |
|
my %tasksof; |
|
my %wordsof; |
|
my %relevance; |
|
my @blob; |
|
my %count; |
|
|
|
$sstr=~tr/йцукенгшщзхъфывапролджэячсмитьбю/ЙЦУКЕНГШЩЗХЪФЫВАПРОЛДЖЭЯЧСМИТЬБЮ/; |
|
@qw=@w =split (' ', uc $sstr); |
|
|
|
#----------- |
|
foreach $i (0..$#w) # заполняем массив @nf начальных форм |
|
# $nf[$i] -- ссылка на массив возможных |
|
# начальных форм словоформы $i |
|
{ |
|
$qw= $dbh->quote (uc $w[$i]); |
|
$query=" select distinct w2 from nests |
|
where w1=$qw"; |
|
print "$query",br if $printqueries; |
|
$sth=$dbh -> prepare($query); |
|
$sth -> execute; |
|
@{$nf[$i]}=(); |
|
while (@arr = $sth->fetchrow) |
|
{ |
|
push (@{$nf[$i]},$arr[0]) |
|
} |
|
} |
|
|
|
my @bad=grep {!$nf[$_]} 0..$#w; # @bad -- номера словоформ, |
|
# которых нет в словаре |
|
|
|
if (@bad) #есть неопознанные словоформы |
|
{ |
|
require "cw.pl"; |
|
foreach $i(@bad) |
|
{ |
|
if (@arr=checkword($dbh,$w[$i])) |
|
{push (@{$nf[$i]}, @arr);} |
|
else |
|
{push (@verybad,$i);} |
|
} |
|
} |
|
return () if ($all && @verybad); |
|
|
|
my $kvo=0; |
|
push @$allnf, @{$_} foreach @nf; |
|
|
|
foreach $i (0..$#w) #запросы в базу... |
|
{ |
|
@arr=@{$nf[$i]} if $nf[$i]; |
|
@arr2=@arr1=@arr; |
|
|
|
|
|
|
|
|
|
$_= " word2question.word=".$_. ' ' foreach @arr; |
|
$_= " nf.id=".$_. ' ' foreach @arr1; |
|
|
|
# $_= " nests.w2=".$_. ' ' foreach @arr2; |
|
# $query="select w1 from nests where". (join ' OR ', @arr2); |
|
#print $query if $printqueries; |
|
# $sth=$dbh -> prepare($query); |
|
# $sth->execute; |
|
# while (@ar=$sth->fetchrow) |
|
# { |
|
# $ar[0]=~s/(.)/&nocase($1)/ge; |
|
# push(@sf,'(?:'.$ar[0].')'); |
|
# } |
|
# $selectshablon=join '|',@sf; |
|
|
|
#print $selectshablon,br if $printqueries; |
|
|
|
# $selectshablon=qr/$selectshablon/i; |
|
|
|
|
|
|
|
|
|
$query="select questions from word2question where". (join ' OR ', @arr); |
|
print "$query\n",br if $printqueries; |
|
|
|
$sth=$dbh -> prepare($query); |
|
$sth->execute; |
|
|
|
@blob=(); |
|
while (@arr=$sth->fetchrow) |
|
{ |
|
@blob=(@blob,unpack 'C*',$arr[0]); |
|
} |
|
$query="select number from nf where ".(join ' OR ', @arr1); |
|
print "$query\n",br if $printqueries; |
|
$sth=$dbh -> prepare($query); |
|
$sth->execute; |
|
|
|
while (@arr=$sth->fetchrow) |
|
{ |
|
$frequence[$i]+=$arr[0]; |
|
} |
|
|
|
|
|
|
|
|
|
if (@blob < 4) |
|
{ |
|
$tasksof{$i}=undef; |
|
} else |
|
{ |
|
$kvo++; |
|
$ii=0; |
|
while ($ii<$#blob) # создаём хэш %tasksof, ключи которого -- |
|
# номера искомых словоформ, а значения -- |
|
# списки вопросов, в которых есть соответствующа |
|
# словоформа. |
|
# Каждый список в свою очередь также оформлен в |
|
# виде хэша, ключи которого -- номера вопросов, |
|
# а значения -- списки номеров вхождений. Вот. |
|
{ |
|
($field,$lo,$hi,$wordnumber)=@blob[$ii..($ii+3)]; |
|
$ii+=4; |
|
$number=$lo+$hi*256; |
|
$field=$fieldname{$field}; |
|
if ($searchin{lc $field}) |
|
{ |
|
|
|
push @{$tasksof{$i}{$number}}, $wordnumber; |
|
# дополнили в хэше, висящем на |
|
# словоформе $i в %tasksof список |
|
# вхождений $i в вопрос $number. |
|
push @{$wordsof{$number}{$i}}, $wordnumber; |
|
# дополнили в хэше, висящем на |
|
# вопросе $number в %wordsof список |
|
# вхождений $i в вопрос $number. |
|
|
|
|
|
} |
|
} #while ($ii<$#blob) |
|
} |
|
} #foreach $i |
|
|
|
print "keys tasksof", keys %tasksof if $printqueries; |
|
#Ищем пересечение или объединение списков вопросов (значений %tasksof) |
|
foreach $sf (keys %tasksof) |
|
{ |
|
$count{$_}++ foreach keys %{$tasksof{$sf}}; |
|
} |
|
@tasks= ($all ? (grep {$count{$_}==$kvo} keys %count) : |
|
keys %count) ; |
|
|
|
|
|
#print "\n\$#tasks=",$#tasks,br; |
|
############ Сортировка найденных вопросов |
|
|
|
foreach (keys %wordsof) |
|
{ |
|
$relevance{$_}=&relevance($#w,$wordsof{$_},\@frequence) if $_ |
|
} |
|
|
|
@tasks=sort {$relevance{$b}<=>$relevance{$a}} @tasks; |
|
|
|
|
|
############ |
|
|
|
print "tasks=@tasks" if $printqueries;; |
|
|
|
#print "$_ $relevance{$_} | " foreach @tasks; |
|
#print br; |
|
print "allnf=@$allnf",br if $printqueries; |
|
return @tasks; |
|
} |
|
|
|
|
|
sub distance { |
|
# на входе -- номера словоформ и ссылки на |
|
# списки вхождений. На выходе -- расстояние, |
|
# вычисляемое по формуле min(|b-a-pb+pa|) |
|
# pb,pa |
|
# (pb и pa -- позиции слов b и a) |
|
my ($a,$b,$lista,$listb)=@_; |
|
my ($pa,$pb,$min,$curmin); |
|
$min=10000; |
|
foreach $pa (@$lista) |
|
{ |
|
foreach $pb (@$listb) |
|
{ |
|
$curmin=abs($b-$a-$pb+$pa); |
|
$min= $curmin if $curmin<$min; |
|
} |
|
} |
|
return $min; |
|
|
|
} |
|
|
|
sub relevance { |
|
# На входе -- количество искомых словоформ -1 и |
|
# ссылка на hash, ключи которого -- |
|
# номера словоформ, а значения -- списки вхождений |
|
|
|
my ($n,$words,$frequence)=@_; |
|
my $relevance=0; |
|
my ($first,$second,$d); |
|
foreach $first (0..$n) |
|
{ |
|
$relevance+=scalar @{$$words{$first}}+1000+1000/$$frequence[$first] |
|
if $$words{$first}; |
|
foreach $second ($first+1..$n) |
|
{ |
|
$d=&distance($first,$second,$$words{$first},$$words{$second}); |
|
$relevance+=($d>10?0:10-$d)*10; |
|
} |
|
} |
|
return $relevance; |
|
} |
|
|
|
|
|
|
# Returns list of QuestionId's, that have the search string in them. |
# Returns list of QuestionId's, that have the search string in them. |
sub Search { |
sub Search { |
my ($dbh, $sstr, $metod) = @_; |
my ($dbh, $sstr,$metod,$all,$allnf) = @_; |
my (@arr, @Questions, @fields); |
my (@arr, @Questions, @fields); |
my (@sar, $i, $sth,$where); |
my (@sar, $i, $sth,$where); |
my $btime=time; |
|
|
|
# push @fields, 'Question'; |
# push @fields, 'Question'; |
|
|
|
if ($metod eq 'rus') |
|
{ |
|
my @tasks=russearch($dbh,$sstr,$all,$allnf); |
|
return @tasks |
|
} |
|
|
|
|
###Simple and advanced query processing. Added by R7 |
###Simple and advanced query processing. Added by R7 |
if ($metod eq 'simple' || $metod eq 'advanced') |
if ($metod eq 'simple' || $metod eq 'advanced') |
{ |
{ |
Line 123 sub Search {
|
Line 369 sub Search {
|
} |
} |
} |
} |
|
|
|
|
@fields=(qw/Question Answer Sources Authors Comments/) unless scalar @fields; |
@fields=(qw/Question Answer Sources Authors Comments/) unless scalar @fields; |
my $fields=join ",", @fields; |
my $fields=join ",", @fields; |
my $q=new Text::Query($sstr, |
my $q=new Text::Query($sstr, |
Line 135 sub Search {
|
Line 382 sub Search {
|
$where= $$q{'matchexp'}; |
$where= $$q{'matchexp'}; |
my $query= "SELECT Questionid FROM Questions |
my $query= "SELECT Questionid FROM Questions |
WHERE $where"; |
WHERE $where"; |
print br."Query is: $query".br if $debug; |
print br."Query is: $query".br if $printqueries; |
|
|
$sth = $dbh->prepare($query); |
$sth = $dbh->prepare($query); |
} else |
} else |
Line 160 sub Search {
|
Line 407 sub Search {
|
} else { |
} else { |
$sstr = join " OR $f LIKE ", @sar; |
$sstr = join " OR $f LIKE ", @sar; |
} |
} |
|
|
$sth = $dbh->prepare("SELECT QuestionId FROM Questions |
my $query="SELECT QuestionId FROM Questions |
WHERE $f LIKE $sstr ORDER BY QuestionId"); |
WHERE $f LIKE $sstr ORDER BY QuestionId"; |
|
print $query if $printqueries; |
|
$sth = $dbh->prepare($query) |
|
|
} #else -- processing old-style query (R7) |
} #else -- processing old-style query (R7) |
|
|
Line 170 sub Search {
|
Line 419 sub Search {
|
while (@arr = $sth->fetchrow) { |
while (@arr = $sth->fetchrow) { |
push @Questions, $arr[0]; |
push @Questions, $arr[0]; |
} |
} |
print br, "Search time: ",time-$btime," sec",br if $debug; |
|
return @Questions; |
return @Questions; |
} |
} |
|
|
Line 191 sub NoCase {
|
Line 440 sub NoCase {
|
|
|
sub PrintSearch { |
sub PrintSearch { |
my ($dbh, $sstr, $metod) = @_; |
my ($dbh, $sstr, $metod) = @_; |
my (@Questions) = &Search($dbh, $sstr,$metod); |
my @allnf; |
|
my (@Questions) = &Search($dbh, $sstr,$metod,$all,\@allnf); |
my ($output, $i, $suffix, $hits) = ('', 0, '', $#Questions + 1); |
my ($output, $i, $suffix, $hits) = ('', 0, '', $#Questions + 1); |
|
|
|
my $shablon; |
|
|
|
|
|
if ($metod eq 'rus') |
|
{ |
|
my $where='0'; |
|
$where.= " or w2=$_ " foreach @allnf; |
|
my $query="select w1 from nests where $where"; |
|
my $sth=$dbh->prepare($query); |
|
print "$query" if $printqueries; |
|
|
|
$sth->execute; |
|
my @shablon; |
|
while (my @arr = $sth->fetchrow) |
|
{ |
|
push @shablon,"(?:$arr[0])"; |
|
} |
|
$shablon= join "|", @shablon; |
|
$shablon=~s/[её]/\[ЕЁ\]/gi; |
|
# $shablon=~s/([йцукенгшщзхъфывапролджэячсмитьбюЙЦУКЕНГШЩЗХЪФЫВАПРОЛДЖЭЯЧСМИТЬБЮ])/&NoCase($1)/ge; |
|
$shablon=qr/$shablon/i; |
|
print "!$shablon!",br if $printqueries; |
|
} |
|
|
|
|
|
|
if ($hits =~ /1.$/ || $hits =~ /[5-90]$/) { |
if ($hits =~ /1.$/ || $hits =~ /[5-90]$/) { |
$suffix = 'й'; |
$suffix = 'й'; |
} elsif ($hits =~ /1$/) { |
} elsif ($hits =~ /1$/) { |
Line 214 sub PrintSearch {
|
Line 490 sub PrintSearch {
|
my(@sar) = split(' ', $sstr); |
my(@sar) = split(' ', $sstr); |
for ($i = 0; $i <= $#Questions; $i++) { |
for ($i = 0; $i <= $#Questions; $i++) { |
$output = &PrintQuestion($dbh, $Questions[$i], 1, $i + 1, 1); |
$output = &PrintQuestion($dbh, $Questions[$i], 1, $i + 1, 1); |
|
if (param('metod') eq 'rus') |
|
{ |
|
$output=~s/\b($shablon)\b/\<strong\>$1\<\/strong\>/gi; |
|
} else { |
foreach (@sar) { |
foreach (@sar) { |
$output =~ s/$_/<strong>$&<\/strong>/gs; |
$output =~ s/$_/<strong>$&<\/strong>/gs; |
} |
}} |
print $output; |
print $output; |
} |
} |
} |
} |
Line 640 MAIN:
|
Line 920 MAIN:
|
print &Include_virtual("../dimrub/db/reklama.html"); |
print &Include_virtual("../dimrub/db/reklama.html"); |
} |
} |
|
|
|
if ($^O =~ /win/i) { |
|
$thislocale = "Russian_Russia.20866"; |
|
} else { |
|
$thislocale = "ru_RU.KOI8-R"; |
|
} |
|
POSIX::setlocale( &POSIX::LC_ALL, $thislocale ); |
|
|
|
if ((uc 'а') ne 'А') {print "Koi8-r locale not installed!\n"}; |
|
|
|
|
if ($text) { |
if ($text) { |
print header('text/plain'); |
print header('text/plain'); |
} |
} |