Класс Find
Объект $Find:
Объект для поиска по страницам
Исходный код
class Find extends \Cms\Site\Page { … }
Свойства
$minWordLength
$Find->minWordLength = 3;
Исходный код
var $minWordLength = 3;
$VOWEL
$Find->VOWEL = '/аеиоуыэюя/u';
Исходный код
var $VOWEL = '/аеиоуыэюя/u';
$PERFECTIVEGROUND
$Find->PERFECTIVEGROUND = '/((ив|ивши|ившись|ыв|ывши|ывшись)|((?<=[ая])(в|вши|вшись)))$/u';
Исходный код
var $PERFECTIVEGROUND = '/((ив|ивши|ившись|ыв|ывши|ывшись)|((?<=[ая])(в|вши|вшись)))$/u';
$REFLEXIVE
$Find->REFLEXIVE = '/(с[яь])$/u';
Исходный код
var $REFLEXIVE = '/(с[яь])$/u';
$ADJECTIVE
$Find->ADJECTIVE = '/(ее|ие|ые|ое|ими|ыми|ей|ий|ый|ой|ем|им|ым|ом|его|ого|еых|ую|юю|ая|яя|ою|ею)$/u';
Исходный код
var $ADJECTIVE = '/(ее|ие|ые|ое|ими|ыми|ей|ий|ый|ой|ем|им|ым|ом|его|ого|еых|ую|юю|ая|яя|ою|ею)$/u';
$PARTICIPLE
$Find->PARTICIPLE = '/((ивш|ывш|ующ)|((?<=[ая])(ем|нн|вш|ющ|щ)))$/u';
Исходный код
var $PARTICIPLE = '/((ивш|ывш|ующ)|((?<=[ая])(ем|нн|вш|ющ|щ)))$/u';
$VERB
$Find->VERB = '/((ила|ыла|ена|ейте|уйте|ите|или|ыли|ей|уй|ил|ыл|им|ым|ены|ить|ыть|ишь|ую|ю)|((?<=[ая])(ла|на|ете|йте|ли|й|л|ем|н|ло|но|ет|ют|ны|ть|ешь|нно)))$/u';
Исходный код
var $VERB = '/((ила|ыла|ена|ейте|уйте|ите|или|ыли|ей|уй|ил|ыл|им|ым|ены|ить|ыть|ишь|ую|ю)|((?<=[ая])(ла|на|ете|йте|ли|й|л|ем|н|ло|но|ет|ют|ны|ть|ешь|нно)))$/u';
$NOUN
$Find->NOUN = '/(а|ев|ов|ие|ье|е|иями|ями|ами|еи|ии|и|ией|ей|ой|ий|й|и|ы|ь|ию|ью|ю|ия|ья|я)$/u';
Исходный код
var $NOUN = '/(а|ев|ов|ие|ье|е|иями|ями|ами|еи|ии|и|ией|ей|ой|ий|й|и|ы|ь|ию|ью|ю|ия|ья|я)$/u';
$RVRE
$Find->RVRE = '/^(.*?[аеиоуыэюя])(.*)$/u';
Исходный код
var $RVRE = '/^(.*?[аеиоуыэюя])(.*)$/u';
$DERIVATIONAL
$Find->DERIVATIONAL = '/[^аеиоуыэюя][аеиоуыэюя]+[^аеиоуыэюя]+[аеиоуыэюя].*(?<=о)сть?$/u';
Исходный код
var $DERIVATIONAL = '/[^аеиоуыэюя][аеиоуыэюя]+[^аеиоуыэюя]+[аеиоуыэюя].*(?<=о)сть?$/u';
Методы
register()
$Find->register(&$Main);
Исходный код
function register(&$Main) {
$Main->addDefault("search", array(
"type" => "find",
"title" => "Поиск",
));
return array(
"title" => "Поиск",
);
}
reindex()
$Find->reindex();
Исходный код
function reindex() {
$Data = $this->Data();
$Main = $this->Main();
foreach ($Main->Storage()->tables as $table => $fields) {
if ($result = $Data->query("SELECT * FROM `$table`")) while ($row = $Data->fetch($result)) {
$Item = $Main->load($row);
$Item->openAll();
if (!$Item->saveFull()) {
print $Item->error;
}
}
}
}
request()
$Find->request($s=null);
Исходный код
function request($s = null) {
if (!isset($s)) $s = strval($_GET['s']);
if (strlen($search = trim($s))) {
if ($words = $this->getWords($search)) {
if ($stem = $this->getStem($words)) {
$Data = $this->Data();
$pwords = array();
$qstem = array();
$rstem = array();
foreach ($words as $word) if (mb_strlen($word) >= $this->minWordLength) $pwords[] = preg_quote($word, "~");
foreach ($stem as $st) { $qstem[] = $Data->quote($st); $rstem[$st] = $st; }
$pwords = implode("|", $pwords);
$qstem = implode(",", $qstem);
$this->qsearch = $Data->quote($search);
$this->search = $search;
$this->pwords = $pwords;
$this->words = $words;
$this->qstem = $qstem;
$this->rstem = $rstem;
$this->stem = $stem;
return true;
} else {
$this->error = "Пожалуйста, уточните Ваш поисковый запрос";
}
} else {
$this->error = "Пожалуйста, уточните Ваш поисковый запрос";
}
} else {
$this->error = "Пожалуйста, введите строку для поиска";
}
return false;
}
getIndex()
$Find->getIndex();
Исходный код
function getIndex() {
if (!$this->Conf()->get("find-full-stem", true))
return "SUM(`stat`.`c`) / MAX(`stat`.`c`/`stat`.`p`) + AVG(`stat`.`d`) * 0.1 + AVG(`stat`.`f`) * 0.01 + SUM(`stat`.`c`) * 0.01";
else
return "MATCH (`full`.`full`) AGAINST (" . $this->Data()->quote(implode(" ", $this->stem)) . ")";
}
getQuery()
$Find->getQuery();
Исходный код
function getQuery() {
if (!isset($this->qstem)) return;
if (!$this->Conf()->get("find-full-stem", true))
return ""
. "SELECT `full`.*, " . $this->getIndex() . " AS `index`"
. " FROM `full` JOIN `stat` ON `full`.`id`=`stat`.`id`"
. " WHERE `full`.`menu`='Y' AND `stat`.`word` IN ({$this->qstem})"
. " GROUP BY `full`.`id`"
. " ORDER BY `index` DESC, `full`.`title`"
;
else
return ""
. "SELECT `full`.*, " . $this->getIndex() . " AS `index`"
. " FROM `full`"
. " WHERE `full`.`menu`='Y'"
. " HAVING `index`>0"
. " ORDER BY `index` DESC, `full`.`title`"
;
}
getWords()
$Find->getWords($full=null);
Исходный код
function getWords($full = null) {
return preg_split('~[^\p{L}\p{N}]~us', $full, -1, PREG_SPLIT_NO_EMPTY);
}
getStem()
$Find->getStem($full);
Исходный код
function getStem($full) {
$stem = array();
if (is_array($full)) {
foreach ($full as $word) {
if (strlen($st = $this->stemWord($word))) {
$stem[] = $st;
}
}
} else {
foreach (preg_split('~[^\p{L}\p{N}]~us', $full, -1, PREG_SPLIT_NO_EMPTY) as $word) {
# 2013-04-22 Слова минимум из трех букв
if (mb_strlen($st = $this->stemWord($word)) >= $this->minWordLength) {
$stem[] = $st;
}
}
}
return $stem;
}
getStat()
$Find->getStat(&$stem);
Исходный код
function getStat(&$stem) {
$stat = array();
if ($stem) if ($count = count($stem)) {
foreach ($stem as $i => $word) {
if (!isset($stat[$word])) $stat[$word] = array( 'word' => $word, 'c' => 0, 'p' => 0, 'f' => $i, 'd' => 0, 'x' => array() );
$info = $stat[$word];
$info['c'] ++;
$info['p'] = $info['c'] / $count;
$info['x'][] = $i / $count;
$stat[$word] = $info;
}
foreach ($stat as $word => $info) {
$xinfo = &$stat[$word];
$xs = $info['x'];
unset($xinfo['x']);
$xcount = count($xs);
$m = array_sum($xs) / $xcount;
foreach ($xs as $i => $x) $xs[$i] = ($x - $m) * ($x - $m);
$d = array_sum($xs) / $xcount;
$xinfo['d'] = $d;
$xinfo['f'] /= $count;
}
}
return $stat;
}
trimLeft()
$Find->trimLeft($text, $length);
Обрезка текста слева
Исходный код
function trimLeft($text, $length) {
if (strlen($text) <= $length) return $text;
$r = preg_replace('/^[^\p{L}\p{N}]*/us', '', $text);
while (strlen($r) > $length) {
$text = $r;
$r = preg_replace('/^[\p{L}\p{N}]*[^\p{L}\p{N}]*/us', '', $r);
if ($r == $text) break;
}
return $r;
}
trimRight()
$Find->trimRight($text, $length);
Обрезка текста справа
Исходный код
function trimRight($text, $length) {
if (strlen($text) <= $length) return $text;
$r = preg_replace('/[\s\p{L}\p{N}]*$/us', '', $text);
while (strlen($r) > $length) {
$text = $r;
$r = preg_replace('/[\s\p{L}\p{N}]*[^\p{L}\p{N}]*$/us', '', $r);
if ($r == $text) break;
}
return $r;
}
highlight()
$Find->highlight($text, $trim=0);
Исходный код
function highlight($text, $trim = 0) {
if (!isset($this->search)) $this->request();
$r = "";
// count = n - число отображенных строк
$count = 1;
// lines = n - число отображаемых строк
$lines = 2;
if ($trim === true) $trim = 150;
if (!$this->stem) return $text;
// high = array( i => array( offset, length ), ... ) - массив совпадений
if (!$trim) {
$line = $text;
$high = array();
if (preg_match_all('/[\p{L}\p{N}]+/u', $line, $m, PREG_OFFSET_CAPTURE)) {
$m = $m[0];
foreach ($m as $i => $a) {
list($word, $offs) = $a;
$st = $this->stemWord($word);
if (isset($this->rstem[$st])) {
$high[] = array($offs, $strl = strlen($word));
}
}
if (!$high) {
$orig = $line;
$line = preg_replace("~($this->pwords)~ui", "<b class=\"hi\">\$1</b>", $line);
return $line;
}
$curs = 0;
$lens = 0;
$r .= substr($line, 0, $high[0][0]);
$lens += $high[0][0];
foreach ($high as $i => $a) {
if ($i != 0) {
$r .= substr($line, $curs, $a[0] - $curs);
$lens += $a[0] - $curs;
}
$r .= "<b class=\"hi\">" . substr($line, $a[0], $a[1]) . "</b>";
$lens += $a[1];
$curs = $a[0] + $a[1];
}
$r .= substr($line, $curs);
return $r;
}
return $text;
}
foreach (explode("\n", $text) as $line) if (strlen($line = trim($line))) {
$high = array();
$off1 = -1; // начало первого совпадения
$off2 = -1; // конец последнего совпадения
if (preg_match_all('/[\p{L}\p{N}]+/u', $line, $m, PREG_OFFSET_CAPTURE)) {
$m = $m[0];
foreach ($m as $i => $a) {
list($word, $offs) = $a;
$st = $this->stemWord($word);
if (isset($this->rstem[$st])) {
$high[] = array($offs, $strl = strlen($word));
if ($off1 < 0) $off1 = $offs;
$off2 = $offs + $strl;
}
}
if (!$high) {
$line = $this->trimRight($line, $trim);
$orig = $line;
$line = preg_replace("~($this->pwords)~ui", "<b class=\"hi\">\$1</b>", $line);
if ($line != $orig) {
$r .= $line;
if ($trim) $r .= "<br>";
if (($count ++) >= $lines) break;
}
continue;
}
$curs = 0;
$lens = 0;
if ($off2 > $trim) {
$r .= "...";
if ($off2 - $off1 < $trim) {
$t = $this->trimLeft(substr($line, 0, $high[0][0]), $trim - $off2 + $off1);
$r .= $t;
$lens += strlen($t);
}
} else {
$r .= substr($line, 0, $high[0][0]);
$lens += $high[0][0];
}
foreach ($high as $i => $a) {
if ($i != 0) {
$r .= substr($line, $curs, $a[0] - $curs);
$lens += $a[0] - $curs;
}
$r .= "<b class=\"hi\">" . substr($line, $a[0], $a[1]) . "</b>";
$lens += $a[1];
$curs = $a[0] + $a[1];
if ($lens > $trim) {
$r .= "...";
break;
}
}
if ($lens <= $trim) {
if (strlen($line) - $curs > $trim - $lens) {
$r .= $this->trimRight(substr($line, $curs), $trim - $lens);
$r .= "...";
} else {
$r .= substr($line, $curs);
}
}
if ($trim) $r .= "<br>";
if (($count ++) > $lines) break;
}
}
if (!strlen($r)) {
$x = explode("\n", $text, 3);
if (isset($x[0])) $r .= $this->trimRight($x[0], $trim) . "<br>";
if (isset($x[1])) $r .= $this->trimRight($x[1], $trim) . "<br>";
}
if (strlen($r) && $trim) {
$list = explode("<br>", $r);
$done = false;
$prev = null;
foreach ($list as $i => $line) {
if ($line === $prev) {
unset($list[$i]);
$done = true;
}
$prev = $line;
}
if ($done) $r = implode("<br>", $list);
}
return $r;
}
substitute()
$Find->substitute(&$s, $re, $to);
Исходный код
function substitute(&$s, $re, $to) {
$o = $s;
$s = preg_replace($re, $to, $s);
return $o !== $s;
}
stemWord()
$Find->stemWord($word);
Исходный код
function stemWord($word) {
$length = strlen($word);
if ($length < $this->minWordLength) return "";
for ($i = 0; $i < $length; $i ++) {
$c = ord($word[$i]);
if ($c < 0x80) return mb_strtolower($word);
}
if ($length < 6) return "";
$word = str_replace('ё', 'е', mb_strtolower($word));
$stem = $word;
do {
if (!preg_match($this->RVRE, $word, $p)) break;
$start = $p[1];
$RV = $p[2];
if (!$RV) break;
# Step 1
if (!$this->substitute($RV, $this->PERFECTIVEGROUND, '')) {
$this->substitute($RV, $this->REFLEXIVE, '');
if ($this->substitute($RV, $this->ADJECTIVE, '')) {
$this->substitute($RV, $this->PARTICIPLE, '');
} else {
if (!$this->substitute($RV, $this->VERB, ''))
$this->substitute($RV, $this->NOUN, '');
}
}
# Step 2
$this->substitute($RV, '/и$/u', '');
# Step 3
if (preg_match($this->DERIVATIONAL, $RV))
$this->substitute($RV, '/ость?$/u', '');
# Step 4
if (!$this->substitute($RV, '/ь$/u', '')) {
$this->substitute($RV, '/ейше?/u', '');
$this->substitute($RV, '/нн$/u', 'н');
}
$stem = $start . $RV;
} while(false);
return $stem;
}