Система управления «Сайт PRO»
Версия 20240107

Класс Find

Объект $Find: Cms\Root\Find наследует Cms\Site\Page

Объект для поиска по страницам

Исходный код
class Find extends \Cms\Site\Page { … }

Свойства

$minWordLength

$Find->minWordLength = 3;
Исходный код
    var $minWordLength = 3;

$VOWEL

$Find->VOWEL = '/аеиоуыэюя/u';
Исходный код
    var $VOWEL = '/аеиоуыэюя/u';

$PERFECTIVEGROUND

$Find->PERFECTIVEGROUND = '/((ив|ивши|ившись|ыв|ывши|ывшись)|((?<=[ая])(в|вши|вшись)))$/u';
Исходный код
    var $PERFECTIVEGROUND = '/((ив|ивши|ившись|ыв|ывши|ывшись)|((?<=[ая])(в|вши|вшись)))$/u';

$REFLEXIVE

$Find->REFLEXIVE = '/(с[яь])$/u';
Исходный код
    var $REFLEXIVE = '/(с[яь])$/u';

$ADJECTIVE

$Find->ADJECTIVE = '/(ее|ие|ые|ое|ими|ыми|ей|ий|ый|ой|ем|им|ым|ом|его|ого|еых|ую|юю|ая|яя|ою|ею)$/u';
Исходный код
    var $ADJECTIVE = '/(ее|ие|ые|ое|ими|ыми|ей|ий|ый|ой|ем|им|ым|ом|его|ого|еых|ую|юю|ая|яя|ою|ею)$/u';

$PARTICIPLE

$Find->PARTICIPLE = '/((ивш|ывш|ующ)|((?<=[ая])(ем|нн|вш|ющ|щ)))$/u';
Исходный код
    var $PARTICIPLE = '/((ивш|ывш|ующ)|((?<=[ая])(ем|нн|вш|ющ|щ)))$/u';

$VERB

$Find->VERB = '/((ила|ыла|ена|ейте|уйте|ите|или|ыли|ей|уй|ил|ыл|им|ым|ены|ить|ыть|ишь|ую|ю)|((?<=[ая])(ла|на|ете|йте|ли|й|л|ем|н|ло|но|ет|ют|ны|ть|ешь|нно)))$/u';
Исходный код
    var $VERB = '/((ила|ыла|ена|ейте|уйте|ите|или|ыли|ей|уй|ил|ыл|им|ым|ены|ить|ыть|ишь|ую|ю)|((?<=[ая])(ла|на|ете|йте|ли|й|л|ем|н|ло|но|ет|ют|ны|ть|ешь|нно)))$/u';

$NOUN

$Find->NOUN = '/(а|ев|ов|ие|ье|е|иями|ями|ами|еи|ии|и|ией|ей|ой|ий|й|и|ы|ь|ию|ью|ю|ия|ья|я)$/u';
Исходный код
    var $NOUN = '/(а|ев|ов|ие|ье|е|иями|ями|ами|еи|ии|и|ией|ей|ой|ий|й|и|ы|ь|ию|ью|ю|ия|ья|я)$/u';

$RVRE

$Find->RVRE = '/^(.*?[аеиоуыэюя])(.*)$/u';
Исходный код
    var $RVRE = '/^(.*?[аеиоуыэюя])(.*)$/u';

$DERIVATIONAL

$Find->DERIVATIONAL = '/[^аеиоуыэюя][аеиоуыэюя]+[^аеиоуыэюя]+[аеиоуыэюя].*(?<=о)сть?$/u';
Исходный код
    var $DERIVATIONAL = '/[^аеиоуыэюя][аеиоуыэюя]+[^аеиоуыэюя]+[аеиоуыэюя].*(?<=о)сть?$/u';

Методы

register()

$Find->register(&$Main);
Исходный код
    function register(&$Main) {
        $Main->addDefault("search", array(
            "type" => "find",
            "title" => "Поиск",
        ));
        return array(
            "title" => "Поиск",
        );
    }

reindex()

$Find->reindex();
Исходный код
    function reindex() {
        $Data = $this->Data();
        $Main = $this->Main();
        foreach ($Main->Storage()->tables as $table => $fields) {
            if ($result = $Data->query("SELECT * FROM `$table`")) while ($row = $Data->fetch($result)) {
                $Item = $Main->load($row);
                $Item->openAll();
                if (!$Item->saveFull()) {
                    print $Item->error;
                }
            }
        }
    }

request()

$Find->request($s=null);
Исходный код
    function request($s = null) {
        if (!isset($s)) $s = strval($_GET['s']);
        if (strlen($search = trim($s))) {
            if ($words = $this->getWords($search)) {
                if ($stem = $this->getStem($words)) {
                    $Data = $this->Data();
                    $pwords = array();
                    $qstem = array();
                    $rstem = array();
                    foreach ($words as $word) if (mb_strlen($word) >= $this->minWordLength) $pwords[] = preg_quote($word, "~");
                    foreach ($stem as $st) { $qstem[] = $Data->quote($st); $rstem[$st] = $st; }
                    $pwords = implode("|", $pwords);
                    $qstem = implode(",", $qstem);
                    $this->qsearch = $Data->quote($search);
                    $this->search = $search;
                    $this->pwords = $pwords;
                    $this->words = $words;
                    $this->qstem = $qstem;
                    $this->rstem = $rstem;
                    $this->stem = $stem;
                    return true;
                } else {
                    $this->error = "Пожалуйста, уточните Ваш поисковый запрос";
                }
            } else {
                $this->error = "Пожалуйста, уточните Ваш поисковый запрос";
            }
        } else {
            $this->error = "Пожалуйста, введите строку для поиска";
        }
        return false;
    }

getIndex()

$Find->getIndex();
Исходный код
    function getIndex() {
        if (!$this->Conf()->get("find-full-stem", true))
        return "SUM(`stat`.`c`) / MAX(`stat`.`c`/`stat`.`p`) + AVG(`stat`.`d`) * 0.1 + AVG(`stat`.`f`) * 0.01 + SUM(`stat`.`c`) * 0.01";
        else
        return "MATCH (`full`.`full`) AGAINST (" . $this->Data()->quote(implode(" ", $this->stem)) . ")";
    }

getQuery()

$Find->getQuery();
Исходный код
    function getQuery() {
        if (!isset($this->qstem)) return;
        if (!$this->Conf()->get("find-full-stem", true))
        return ""
         . "SELECT `full`.*, " . $this->getIndex() . " AS `index`"
         . " FROM `full` JOIN `stat` ON `full`.`id`=`stat`.`id`"
         . " WHERE `full`.`menu`='Y' AND `stat`.`word` IN ({$this->qstem})"
         . " GROUP BY `full`.`id`"
         . " ORDER BY `index` DESC, `full`.`title`"
        ;
        else
        return ""
         . "SELECT `full`.*, " . $this->getIndex() . " AS `index`"
         . " FROM `full`"
         . " WHERE `full`.`menu`='Y'"
         . " HAVING `index`>0"
         . " ORDER BY `index` DESC, `full`.`title`"
        ;
    }

getWords()

$Find->getWords($full=null);
Исходный код
    function getWords($full = null) {
        return preg_split('~[^\p{L}\p{N}]~us', $full, -1, PREG_SPLIT_NO_EMPTY);
    }

getStem()

$Find->getStem($full);
Исходный код
    function getStem($full) {
        $stem = array();
        if (is_array($full)) {
            foreach ($full as $word) {
                if (strlen($st = $this->stemWord($word))) {
                    $stem[] = $st;
                }
            }
        } else {
            foreach (preg_split('~[^\p{L}\p{N}]~us', $full, -1, PREG_SPLIT_NO_EMPTY) as $word) {
                # 2013-04-22 Слова минимум из трех букв
                if (mb_strlen($st = $this->stemWord($word)) >= $this->minWordLength) {
                    $stem[] = $st;
                }
            }
        }
        return $stem;
    }

getStat()

$Find->getStat(&$stem);
Исходный код
    function getStat(&$stem) {
        $stat = array();
        if ($stem) if ($count = count($stem)) {
            foreach ($stem as $i => $word) {
                if (!isset($stat[$word])) $stat[$word] = array( 'word' => $word, 'c' => 0, 'p' => 0, 'f' => $i, 'd' => 0, 'x' => array() );
                $info = $stat[$word];
                $info['c'] ++;
                $info['p'] = $info['c'] / $count;
                $info['x'][] = $i / $count;
                $stat[$word] = $info;
            }
            foreach ($stat as $word => $info) {
                $xinfo = &$stat[$word];
                $xs = $info['x'];
                unset($xinfo['x']);
                $xcount = count($xs);
                $m = array_sum($xs) / $xcount;
                foreach ($xs as $i => $x) $xs[$i] = ($x - $m) * ($x - $m);
                $d = array_sum($xs) / $xcount;
                $xinfo['d'] = $d;
                $xinfo['f'] /= $count;
            }
        }
        return $stat;
    }

trimLeft()

$Find->trimLeft($text, $length);

Обрезка текста слева

Исходный код
    function trimLeft($text, $length) {
        if (strlen($text) <= $length) return $text;
        $r = preg_replace('/^[^\p{L}\p{N}]*/us', '', $text);
        while (strlen($r) > $length) {
            $text = $r;
            $r = preg_replace('/^[\p{L}\p{N}]*[^\p{L}\p{N}]*/us', '', $r);
            if ($r == $text) break;
        }
        return $r;
    }

trimRight()

$Find->trimRight($text, $length);

Обрезка текста справа

Исходный код
    function trimRight($text, $length) {
        if (strlen($text) <= $length) return $text;
        $r = preg_replace('/[\s\p{L}\p{N}]*$/us', '', $text);
        while (strlen($r) > $length) {
            $text = $r;
            $r = preg_replace('/[\s\p{L}\p{N}]*[^\p{L}\p{N}]*$/us', '', $r);
            if ($r == $text) break;
        }
        return $r;
    }

highlight()

$Find->highlight($text, $trim=0);
Исходный код
    function highlight($text, $trim = 0) {
        if (!isset($this->search)) $this->request();
        $r = "";
        // count = n - число отображенных строк
        $count = 1;
        // lines = n - число отображаемых строк
        $lines = 2;
        if ($trim === true) $trim = 150;
        if (!$this->stem) return $text;
        // high = array( i => array( offset, length ), ... ) - массив совпадений
        if (!$trim) {
            $line = $text;
            $high = array();
            if (preg_match_all('/[\p{L}\p{N}]+/u', $line, $m, PREG_OFFSET_CAPTURE)) {
                $m = $m[0];
                foreach ($m as $i => $a) {
                    list($word, $offs) = $a;
                    $st = $this->stemWord($word);
                    if (isset($this->rstem[$st])) {
                        $high[] = array($offs, $strl = strlen($word));
                    }
                }
                if (!$high) {
                    $orig = $line;
                    $line = preg_replace("~($this->pwords)~ui", "<b class=\"hi\">\$1</b>", $line);
                    return $line;
                }
                $curs = 0;
                $lens = 0;
                $r .= substr($line, 0, $high[0][0]);
                $lens += $high[0][0];
                foreach ($high as $i => $a) {
                    if ($i != 0) {
                        $r .= substr($line, $curs, $a[0] - $curs);
                        $lens += $a[0] - $curs;
                    }
                    $r .= "<b class=\"hi\">" . substr($line, $a[0], $a[1]) . "</b>";
                    $lens += $a[1];
                    $curs = $a[0] + $a[1];
                }
                $r .= substr($line, $curs);
                return $r;
            }
            return $text;
        }
        foreach (explode("\n", $text) as $line) if (strlen($line = trim($line))) {
            $high = array();
            $off1 = -1; // начало первого совпадения
            $off2 = -1; // конец последнего совпадения
            if (preg_match_all('/[\p{L}\p{N}]+/u', $line, $m, PREG_OFFSET_CAPTURE)) {
                $m = $m[0];
                foreach ($m as $i => $a) {
                    list($word, $offs) = $a;
                    $st = $this->stemWord($word);
                    if (isset($this->rstem[$st])) {
                        $high[] = array($offs, $strl = strlen($word));
                        if ($off1 < 0) $off1 = $offs;
                        $off2 = $offs + $strl;
                    }
                }
                if (!$high) {
                    $line = $this->trimRight($line, $trim);
                    $orig = $line;
                    $line = preg_replace("~($this->pwords)~ui", "<b class=\"hi\">\$1</b>", $line);
                    if ($line != $orig) {
                        $r .= $line;
                        if ($trim) $r .= "<br>";
                        if (($count ++) >= $lines) break;
                    }
                    continue;
                }
                $curs = 0;
                $lens = 0;
                if ($off2 > $trim) {
                    $r .= "...";
                    if ($off2 - $off1 < $trim) {
                        $t = $this->trimLeft(substr($line, 0, $high[0][0]), $trim - $off2 + $off1);
                        $r .= $t;
                        $lens += strlen($t);
                    }
                } else {
                    $r .= substr($line, 0, $high[0][0]);
                    $lens += $high[0][0];
                }
                foreach ($high as $i => $a) {
                    if ($i != 0) {
                        $r .= substr($line, $curs, $a[0] - $curs);
                        $lens += $a[0] - $curs;
                    }
                    $r .= "<b class=\"hi\">" . substr($line, $a[0], $a[1]) . "</b>";
                    $lens += $a[1];
                    $curs = $a[0] + $a[1];
                    if ($lens > $trim) {
                        $r .= "...";
                        break;
                    }
                }
                if ($lens <= $trim) {
                    if (strlen($line) - $curs > $trim - $lens) {
                        $r .= $this->trimRight(substr($line, $curs), $trim - $lens);
                        $r .= "...";
                    } else {
                        $r .= substr($line, $curs);
                    }
                }
                if ($trim) $r .= "<br>";
                if (($count ++) > $lines) break;
            }
        }
        if (!strlen($r)) {
            $x = explode("\n", $text, 3);
            if (isset($x[0])) $r .= $this->trimRight($x[0], $trim) . "<br>";
            if (isset($x[1])) $r .= $this->trimRight($x[1], $trim) . "<br>";
        }
        if (strlen($r) && $trim) {
            $list = explode("<br>", $r);
            $done = false;
            $prev = null;
            foreach ($list as $i => $line) {
                if ($line === $prev) {
                    unset($list[$i]);
                    $done = true;
                }
                $prev = $line;
            }
            if ($done) $r = implode("<br>", $list);
        }
        return $r;
    }

substitute()

$Find->substitute(&$s, $re, $to);
Исходный код
    function substitute(&$s, $re, $to) {
        $o = $s;
        $s = preg_replace($re, $to, $s);
        return $o !== $s;
    }

stemWord()

$Find->stemWord($word);
Исходный код
    function stemWord($word) {
        $length = strlen($word);
        if ($length < $this->minWordLength) return "";
        for ($i = 0; $i < $length; $i ++) {
            $c = ord($word[$i]);
            if ($c < 0x80) return mb_strtolower($word);
        }
        if ($length < 6) return "";
        $word = str_replace('ё', 'е', mb_strtolower($word));
        $stem = $word;
        do {
            if (!preg_match($this->RVRE, $word, $p)) break;
            $start = $p[1];
            $RV = $p[2];
            if (!$RV) break;
            # Step 1
            if (!$this->substitute($RV, $this->PERFECTIVEGROUND, '')) {
                $this->substitute($RV, $this->REFLEXIVE, '');
                if ($this->substitute($RV, $this->ADJECTIVE, '')) {
                    $this->substitute($RV, $this->PARTICIPLE, '');
                } else {
                    if (!$this->substitute($RV, $this->VERB, ''))
                        $this->substitute($RV, $this->NOUN, '');
                }
            }
            # Step 2
            $this->substitute($RV, '/и$/u', '');
            # Step 3
            if (preg_match($this->DERIVATIONAL, $RV))
                $this->substitute($RV, '/ость?$/u', '');
            # Step 4
            if (!$this->substitute($RV, '/ь$/u', '')) {
                $this->substitute($RV, '/ейше?/u', '');
                $this->substitute($RV, '/нн$/u', 'н');
            }
            $stem = $start . $RV;
        } while(false);
        return $stem;
    }