[ Index ] |
PHP Cross Reference of phpBB-3.1.12-deutsch |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * 4 * This file is part of the phpBB Forum Software package. 5 * 6 * @copyright (c) phpBB Limited <https://www.phpbb.com> 7 * @license GNU General Public License, version 2 (GPL-2.0) 8 * 9 * For full copyright and license information, please see 10 * the docs/CREDITS.txt file. 11 * 12 */ 13 14 namespace phpbb\search; 15 16 /** 17 * phpBB's own db driven fulltext search, version 2 18 */ 19 class fulltext_native extends \phpbb\search\base 20 { 21 /** 22 * Associative array holding index stats 23 * @var array 24 */ 25 protected $stats = array(); 26 27 /** 28 * Associative array stores the min and max word length to be searched 29 * @var array 30 */ 31 protected $word_length = array(); 32 33 /** 34 * Contains tidied search query. 35 * Operators are prefixed in search query and common words excluded 36 * @var string 37 */ 38 protected $search_query; 39 40 /** 41 * Contains common words. 42 * Common words are words with length less/more than min/max length 43 * @var array 44 */ 45 protected $common_words = array(); 46 47 /** 48 * Post ids of posts containing words that are to be included 49 * @var array 50 */ 51 protected $must_contain_ids = array(); 52 53 /** 54 * Post ids of posts containing words that should not be included 55 * @var array 56 */ 57 protected $must_not_contain_ids = array(); 58 59 /** 60 * Post ids of posts containing at least one word that needs to be excluded 61 * @var array 62 */ 63 protected $must_exclude_one_ids = array(); 64 65 /** 66 * Relative path to board root 67 * @var string 68 */ 69 protected $phpbb_root_path; 70 71 /** 72 * PHP Extension 73 * @var string 74 */ 75 protected $php_ext; 76 77 /** 78 * Config object 79 * @var \phpbb\config\config 80 */ 81 protected $config; 82 83 /** 84 * Database connection 85 * @var \phpbb\db\driver\driver_interface 86 */ 87 protected $db; 88 89 /** 90 * phpBB event dispatcher object 91 * @var \phpbb\event\dispatcher_interface 92 */ 93 protected $phpbb_dispatcher; 94 95 /** 96 * User object 97 * @var \phpbb\user 98 */ 99 protected $user; 100 101 /** 102 * Initialises the fulltext_native search backend with min/max word length and makes sure the UTF-8 normalizer is loaded 103 * 104 * @param boolean|string &$error is passed by reference and should either be set to false on success or an error message on failure 105 * @param \phpbb\event\dispatcher_interface $phpbb_dispatcher Event dispatcher object 106 */ 107 public function __construct(&$error, $phpbb_root_path, $phpEx, $auth, $config, $db, $user, $phpbb_dispatcher) 108 { 109 $this->phpbb_root_path = $phpbb_root_path; 110 $this->php_ext = $phpEx; 111 $this->config = $config; 112 $this->db = $db; 113 $this->phpbb_dispatcher = $phpbb_dispatcher; 114 $this->user = $user; 115 116 $this->word_length = array('min' => $this->config['fulltext_native_min_chars'], 'max' => $this->config['fulltext_native_max_chars']); 117 118 /** 119 * Load the UTF tools 120 */ 121 if (!class_exists('utf_normalizer')) 122 { 123 include($this->phpbb_root_path . 'includes/utf/utf_normalizer.' . $this->php_ext); 124 } 125 if (!function_exists('utf8_decode_ncr')) 126 { 127 include($this->phpbb_root_path . 'includes/utf/utf_tools.' . $this->php_ext); 128 } 129 130 $error = false; 131 } 132 133 /** 134 * Returns the name of this search backend to be displayed to administrators 135 * 136 * @return string Name 137 */ 138 public function get_name() 139 { 140 return 'phpBB Native Fulltext'; 141 } 142 143 /** 144 * Returns the search_query 145 * 146 * @return string search query 147 */ 148 public function get_search_query() 149 { 150 return $this->search_query; 151 } 152 153 /** 154 * Returns the common_words array 155 * 156 * @return array common words that are ignored by search backend 157 */ 158 public function get_common_words() 159 { 160 return $this->common_words; 161 } 162 163 /** 164 * Returns the word_length array 165 * 166 * @return array min and max word length for searching 167 */ 168 public function get_word_length() 169 { 170 return $this->word_length; 171 } 172 173 /** 174 * This function fills $this->search_query with the cleaned user search query 175 * 176 * If $terms is 'any' then the words will be extracted from the search query 177 * and combined with | inside brackets. They will afterwards be treated like 178 * an standard search query. 179 * 180 * Then it analyses the query and fills the internal arrays $must_not_contain_ids, 181 * $must_contain_ids and $must_exclude_one_ids which are later used by keyword_search() 182 * 183 * @param string $keywords contains the search query string as entered by the user 184 * @param string $terms is either 'all' (use search query as entered, default words to 'must be contained in post') 185 * or 'any' (find all posts containing at least one of the given words) 186 * @return boolean false if no valid keywords were found and otherwise true 187 */ 188 public function split_keywords($keywords, $terms) 189 { 190 $tokens = '+-|()*'; 191 192 $keywords = trim($this->cleanup($keywords, $tokens)); 193 194 // allow word|word|word without brackets 195 if ((strpos($keywords, ' ') === false) && (strpos($keywords, '|') !== false) && (strpos($keywords, '(') === false)) 196 { 197 $keywords = '(' . $keywords . ')'; 198 } 199 200 $open_bracket = $space = false; 201 for ($i = 0, $n = strlen($keywords); $i < $n; $i++) 202 { 203 if ($open_bracket !== false) 204 { 205 switch ($keywords[$i]) 206 { 207 case ')': 208 if ($open_bracket + 1 == $i) 209 { 210 $keywords[$i - 1] = '|'; 211 $keywords[$i] = '|'; 212 } 213 $open_bracket = false; 214 break; 215 case '(': 216 $keywords[$i] = '|'; 217 break; 218 case '+': 219 case '-': 220 case ' ': 221 $keywords[$i] = '|'; 222 break; 223 case '*': 224 if ($i === 0 || ($keywords[$i - 1] !== '*' && strcspn($keywords[$i - 1], $tokens) === 0)) 225 { 226 if ($i === $n - 1 || ($keywords[$i + 1] !== '*' && strcspn($keywords[$i + 1], $tokens) === 0)) 227 { 228 $keywords = substr($keywords, 0, $i) . substr($keywords, $i + 1); 229 } 230 } 231 break; 232 } 233 } 234 else 235 { 236 switch ($keywords[$i]) 237 { 238 case ')': 239 $keywords[$i] = ' '; 240 break; 241 case '(': 242 $open_bracket = $i; 243 $space = false; 244 break; 245 case '|': 246 $keywords[$i] = ' '; 247 break; 248 case '-': 249 case '+': 250 $space = $keywords[$i]; 251 break; 252 case ' ': 253 if ($space !== false) 254 { 255 $keywords[$i] = $space; 256 } 257 break; 258 default: 259 $space = false; 260 } 261 } 262 } 263 264 if ($open_bracket) 265 { 266 $keywords .= ')'; 267 } 268 269 $match = array( 270 '# +#', 271 '#\|\|+#', 272 '#(\+|\-)(?:\+|\-)+#', 273 '#\(\|#', 274 '#\|\)#', 275 ); 276 $replace = array( 277 ' ', 278 '|', 279 '$1', 280 '(', 281 ')', 282 ); 283 284 $keywords = preg_replace($match, $replace, $keywords); 285 $num_keywords = sizeof(explode(' ', $keywords)); 286 287 // We limit the number of allowed keywords to minimize load on the database 288 if ($this->config['max_num_search_keywords'] && $num_keywords > $this->config['max_num_search_keywords']) 289 { 290 trigger_error($this->user->lang('MAX_NUM_SEARCH_KEYWORDS_REFINE', (int) $this->config['max_num_search_keywords'], $num_keywords)); 291 } 292 293 // $keywords input format: each word separated by a space, words in a bracket are not separated 294 295 // the user wants to search for any word, convert the search query 296 if ($terms == 'any') 297 { 298 $words = array(); 299 300 preg_match_all('#([^\\s+\\-|()]+)(?:$|[\\s+\\-|()])#u', $keywords, $words); 301 if (sizeof($words[1])) 302 { 303 $keywords = '(' . implode('|', $words[1]) . ')'; 304 } 305 } 306 307 // set the search_query which is shown to the user 308 $this->search_query = $keywords; 309 310 $exact_words = array(); 311 preg_match_all('#([^\\s+\\-|()]+)(?:$|[\\s+\\-|()])#u', $keywords, $exact_words); 312 $exact_words = $exact_words[1]; 313 314 $common_ids = $words = array(); 315 316 if (sizeof($exact_words)) 317 { 318 $sql = 'SELECT word_id, word_text, word_common 319 FROM ' . SEARCH_WORDLIST_TABLE . ' 320 WHERE ' . $this->db->sql_in_set('word_text', $exact_words) . ' 321 ORDER BY word_count ASC'; 322 $result = $this->db->sql_query($sql); 323 324 // store an array of words and ids, remove common words 325 while ($row = $this->db->sql_fetchrow($result)) 326 { 327 if ($row['word_common']) 328 { 329 $this->common_words[] = $row['word_text']; 330 $common_ids[$row['word_text']] = (int) $row['word_id']; 331 continue; 332 } 333 334 $words[$row['word_text']] = (int) $row['word_id']; 335 } 336 $this->db->sql_freeresult($result); 337 } 338 339 // Handle +, - without preceeding whitespace character 340 $match = array('#(\S)\+#', '#(\S)-#'); 341 $replace = array('$1 +', '$1 +'); 342 343 $keywords = preg_replace($match, $replace, $keywords); 344 345 // now analyse the search query, first split it using the spaces 346 $query = explode(' ', $keywords); 347 348 $this->must_contain_ids = array(); 349 $this->must_not_contain_ids = array(); 350 $this->must_exclude_one_ids = array(); 351 352 $mode = ''; 353 $ignore_no_id = true; 354 355 foreach ($query as $word) 356 { 357 if (empty($word)) 358 { 359 continue; 360 } 361 362 // words which should not be included 363 if ($word[0] == '-') 364 { 365 $word = substr($word, 1); 366 367 // a group of which at least one may not be in the resulting posts 368 if ($word[0] == '(') 369 { 370 $word = array_unique(explode('|', substr($word, 1, -1))); 371 $mode = 'must_exclude_one'; 372 } 373 // one word which should not be in the resulting posts 374 else 375 { 376 $mode = 'must_not_contain'; 377 } 378 $ignore_no_id = true; 379 } 380 // words which have to be included 381 else 382 { 383 // no prefix is the same as a +prefix 384 if ($word[0] == '+') 385 { 386 $word = substr($word, 1); 387 } 388 389 // a group of words of which at least one word should be in every resulting post 390 if ($word[0] == '(') 391 { 392 $word = array_unique(explode('|', substr($word, 1, -1))); 393 } 394 $ignore_no_id = false; 395 $mode = 'must_contain'; 396 } 397 398 if (empty($word)) 399 { 400 continue; 401 } 402 403 // if this is an array of words then retrieve an id for each 404 if (is_array($word)) 405 { 406 $non_common_words = array(); 407 $id_words = array(); 408 foreach ($word as $i => $word_part) 409 { 410 if (strpos($word_part, '*') !== false) 411 { 412 $id_words[] = '\'' . $this->db->sql_escape(str_replace('*', '%', $word_part)) . '\''; 413 $non_common_words[] = $word_part; 414 } 415 else if (isset($words[$word_part])) 416 { 417 $id_words[] = $words[$word_part]; 418 $non_common_words[] = $word_part; 419 } 420 else 421 { 422 $len = utf8_strlen($word_part); 423 if ($len < $this->word_length['min'] || $len > $this->word_length['max']) 424 { 425 $this->common_words[] = $word_part; 426 } 427 } 428 } 429 if (sizeof($id_words)) 430 { 431 sort($id_words); 432 if (sizeof($id_words) > 1) 433 { 434 $this->{$mode . '_ids'}[] = $id_words; 435 } 436 else 437 { 438 $mode = ($mode == 'must_exclude_one') ? 'must_not_contain' : $mode; 439 $this->{$mode . '_ids'}[] = $id_words[0]; 440 } 441 } 442 // throw an error if we shall not ignore unexistant words 443 else if (!$ignore_no_id && sizeof($non_common_words)) 444 { 445 trigger_error(sprintf($this->user->lang['WORDS_IN_NO_POST'], implode($this->user->lang['COMMA_SEPARATOR'], $non_common_words))); 446 } 447 unset($non_common_words); 448 } 449 // else we only need one id 450 else if (($wildcard = strpos($word, '*') !== false) || isset($words[$word])) 451 { 452 if ($wildcard) 453 { 454 $len = utf8_strlen(str_replace('*', '', $word)); 455 if ($len >= $this->word_length['min'] && $len <= $this->word_length['max']) 456 { 457 $this->{$mode . '_ids'}[] = '\'' . $this->db->sql_escape(str_replace('*', '%', $word)) . '\''; 458 } 459 else 460 { 461 $this->common_words[] = $word; 462 } 463 } 464 else 465 { 466 $this->{$mode . '_ids'}[] = $words[$word]; 467 } 468 } 469 else 470 { 471 if (!isset($common_ids[$word])) 472 { 473 $len = utf8_strlen($word); 474 if ($len < $this->word_length['min'] || $len > $this->word_length['max']) 475 { 476 $this->common_words[] = $word; 477 } 478 } 479 } 480 } 481 482 // Return true if all words are not common words 483 if (sizeof($exact_words) - sizeof($this->common_words) > 0) 484 { 485 return true; 486 } 487 return false; 488 } 489 490 /** 491 * Performs a search on keywords depending on display specific params. You have to run split_keywords() first 492 * 493 * @param string $type contains either posts or topics depending on what should be searched for 494 * @param string $fields contains either titleonly (topic titles should be searched), msgonly (only message bodies should be searched), firstpost (only subject and body of the first post should be searched) or all (all post bodies and subjects should be searched) 495 * @param string $terms is either 'all' (use query as entered, words without prefix should default to "have to be in field") or 'any' (ignore search query parts and just return all posts that contain any of the specified words) 496 * @param array $sort_by_sql contains SQL code for the ORDER BY part of a query 497 * @param string $sort_key is the key of $sort_by_sql for the selected sorting 498 * @param string $sort_dir is either a or d representing ASC and DESC 499 * @param string $sort_days specifies the maximum amount of days a post may be old 500 * @param array $ex_fid_ary specifies an array of forum ids which should not be searched 501 * @param string $post_visibility specifies which types of posts the user can view in which forums 502 * @param int $topic_id is set to 0 or a topic id, if it is not 0 then only posts in this topic should be searched 503 * @param array $author_ary an array of author ids if the author should be ignored during the search the array is empty 504 * @param string $author_name specifies the author match, when ANONYMOUS is also a search-match 505 * @param array &$id_ary passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered 506 * @param int $start indicates the first index of the page 507 * @param int $per_page number of ids each page is supposed to contain 508 * @return boolean|int total number of results 509 */ 510 public function keyword_search($type, $fields, $terms, $sort_by_sql, $sort_key, $sort_dir, $sort_days, $ex_fid_ary, $post_visibility, $topic_id, $author_ary, $author_name, &$id_ary, &$start, $per_page) 511 { 512 // No keywords? No posts. 513 if (empty($this->search_query)) 514 { 515 return false; 516 } 517 518 // we can't search for negatives only 519 if (empty($this->must_contain_ids)) 520 { 521 return false; 522 } 523 524 $must_contain_ids = $this->must_contain_ids; 525 $must_not_contain_ids = $this->must_not_contain_ids; 526 $must_exclude_one_ids = $this->must_exclude_one_ids; 527 528 sort($must_contain_ids); 529 sort($must_not_contain_ids); 530 sort($must_exclude_one_ids); 531 532 // generate a search_key from all the options to identify the results 533 $search_key_array = array( 534 serialize($must_contain_ids), 535 serialize($must_not_contain_ids), 536 serialize($must_exclude_one_ids), 537 $type, 538 $fields, 539 $terms, 540 $sort_days, 541 $sort_key, 542 $topic_id, 543 implode(',', $ex_fid_ary), 544 $post_visibility, 545 implode(',', $author_ary), 546 $author_name, 547 ); 548 549 /** 550 * Allow changing the search_key for cached results 551 * 552 * @event core.search_native_by_keyword_modify_search_key 553 * @var array search_key_array Array with search parameters to generate the search_key 554 * @var array must_contain_ids Array with post ids of posts containing words that are to be included 555 * @var array must_not_contain_ids Array with post ids of posts containing words that should not be included 556 * @var array must_exclude_one_ids Array with post ids of posts containing at least one word that needs to be excluded 557 * @var string type Searching type ('posts', 'topics') 558 * @var string fields Searching fields ('titleonly', 'msgonly', 'firstpost', 'all') 559 * @var string terms Searching terms ('all', 'any') 560 * @var int sort_days Time, in days, of the oldest possible post to list 561 * @var string sort_key The sort type used from the possible sort types 562 * @var int topic_id Limit the search to this topic_id only 563 * @var array ex_fid_ary Which forums not to search on 564 * @var string post_visibility Post visibility data 565 * @var array author_ary Array of user_id containing the users to filter the results to 566 * @since 3.1.7-RC1 567 */ 568 $vars = array( 569 'search_key_array', 570 'must_contain_ids', 571 'must_not_contain_ids', 572 'must_exclude_one_ids', 573 'type', 574 'fields', 575 'terms', 576 'sort_days', 577 'sort_key', 578 'topic_id', 579 'ex_fid_ary', 580 'post_visibility', 581 'author_ary', 582 ); 583 extract($this->phpbb_dispatcher->trigger_event('core.search_native_by_keyword_modify_search_key', compact($vars))); 584 585 $search_key = md5(implode('#', $search_key_array)); 586 587 // try reading the results from cache 588 $total_results = 0; 589 if ($this->obtain_ids($search_key, $total_results, $id_ary, $start, $per_page, $sort_dir) == SEARCH_RESULT_IN_CACHE) 590 { 591 return $total_results; 592 } 593 594 $id_ary = array(); 595 596 $sql_where = array(); 597 $group_by = false; 598 $m_num = 0; 599 $w_num = 0; 600 601 $sql_array = array( 602 'SELECT' => ($type == 'posts') ? 'p.post_id' : 'p.topic_id', 603 'FROM' => array( 604 SEARCH_WORDMATCH_TABLE => array(), 605 SEARCH_WORDLIST_TABLE => array(), 606 ), 607 'LEFT_JOIN' => array(array( 608 'FROM' => array(POSTS_TABLE => 'p'), 609 'ON' => 'm0.post_id = p.post_id', 610 )), 611 ); 612 613 $title_match = ''; 614 $left_join_topics = false; 615 $group_by = true; 616 // Build some display specific sql strings 617 switch ($fields) 618 { 619 case 'titleonly': 620 $title_match = 'title_match = 1'; 621 $group_by = false; 622 // no break 623 case 'firstpost': 624 $left_join_topics = true; 625 $sql_where[] = 'p.post_id = t.topic_first_post_id'; 626 break; 627 628 case 'msgonly': 629 $title_match = 'title_match = 0'; 630 $group_by = false; 631 break; 632 } 633 634 if ($type == 'topics') 635 { 636 $left_join_topics = true; 637 $group_by = true; 638 } 639 640 /** 641 * @todo Add a query optimizer (handle stuff like "+(4|3) +4") 642 */ 643 644 foreach ($this->must_contain_ids as $subquery) 645 { 646 if (is_array($subquery)) 647 { 648 $group_by = true; 649 650 $word_id_sql = array(); 651 $word_ids = array(); 652 foreach ($subquery as $id) 653 { 654 if (is_string($id)) 655 { 656 $sql_array['LEFT_JOIN'][] = array( 657 'FROM' => array(SEARCH_WORDLIST_TABLE => 'w' . $w_num), 658 'ON' => "w$w_num.word_text LIKE $id" 659 ); 660 $word_ids[] = "w$w_num.word_id"; 661 662 $w_num++; 663 } 664 else 665 { 666 $word_ids[] = $id; 667 } 668 } 669 670 $sql_where[] = $this->db->sql_in_set("m$m_num.word_id", $word_ids); 671 672 unset($word_id_sql); 673 unset($word_ids); 674 } 675 else if (is_string($subquery)) 676 { 677 $sql_array['FROM'][SEARCH_WORDLIST_TABLE][] = 'w' . $w_num; 678 679 $sql_where[] = "w$w_num.word_text LIKE $subquery"; 680 $sql_where[] = "m$m_num.word_id = w$w_num.word_id"; 681 682 $group_by = true; 683 $w_num++; 684 } 685 else 686 { 687 $sql_where[] = "m$m_num.word_id = $subquery"; 688 } 689 690 $sql_array['FROM'][SEARCH_WORDMATCH_TABLE][] = 'm' . $m_num; 691 692 if ($title_match) 693 { 694 $sql_where[] = "m$m_num.$title_match"; 695 } 696 697 if ($m_num != 0) 698 { 699 $sql_where[] = "m$m_num.post_id = m0.post_id"; 700 } 701 $m_num++; 702 } 703 704 foreach ($this->must_not_contain_ids as $key => $subquery) 705 { 706 if (is_string($subquery)) 707 { 708 $sql_array['LEFT_JOIN'][] = array( 709 'FROM' => array(SEARCH_WORDLIST_TABLE => 'w' . $w_num), 710 'ON' => "w$w_num.word_text LIKE $subquery" 711 ); 712 713 $this->must_not_contain_ids[$key] = "w$w_num.word_id"; 714 715 $group_by = true; 716 $w_num++; 717 } 718 } 719 720 if (sizeof($this->must_not_contain_ids)) 721 { 722 $sql_array['LEFT_JOIN'][] = array( 723 'FROM' => array(SEARCH_WORDMATCH_TABLE => 'm' . $m_num), 724 'ON' => $this->db->sql_in_set("m$m_num.word_id", $this->must_not_contain_ids) . (($title_match) ? " AND m$m_num.$title_match" : '') . " AND m$m_num.post_id = m0.post_id" 725 ); 726 727 $sql_where[] = "m$m_num.word_id IS NULL"; 728 $m_num++; 729 } 730 731 foreach ($this->must_exclude_one_ids as $ids) 732 { 733 $is_null_joins = array(); 734 foreach ($ids as $id) 735 { 736 if (is_string($id)) 737 { 738 $sql_array['LEFT_JOIN'][] = array( 739 'FROM' => array(SEARCH_WORDLIST_TABLE => 'w' . $w_num), 740 'ON' => "w$w_num.word_text LIKE $id" 741 ); 742 $id = "w$w_num.word_id"; 743 744 $group_by = true; 745 $w_num++; 746 } 747 748 $sql_array['LEFT_JOIN'][] = array( 749 'FROM' => array(SEARCH_WORDMATCH_TABLE => 'm' . $m_num), 750 'ON' => "m$m_num.word_id = $id AND m$m_num.post_id = m0.post_id" . (($title_match) ? " AND m$m_num.$title_match" : '') 751 ); 752 $is_null_joins[] = "m$m_num.word_id IS NULL"; 753 754 $m_num++; 755 } 756 $sql_where[] = '(' . implode(' OR ', $is_null_joins) . ')'; 757 } 758 759 $sql_where[] = $post_visibility; 760 761 $search_query = $this->search_query; 762 $must_exclude_one_ids = $this->must_exclude_one_ids; 763 $must_not_contain_ids = $this->must_not_contain_ids; 764 $must_contain_ids = $this->must_contain_ids; 765 766 /** 767 * Allow changing the query used for counting for posts using fulltext_native 768 * 769 * @event core.search_native_keywords_count_query_before 770 * @var string search_query The parsed keywords used for this search 771 * @var array must_not_contain_ids Ids that cannot be taken into account for the results 772 * @var array must_exclude_one_ids Ids that cannot be on the results 773 * @var array must_contain_ids Ids that must be on the results 774 * @var int total_results The previous result count for the format of the query 775 * Set to 0 to force a re-count 776 * @var array sql_array The data on how to search in the DB at this point 777 * @var bool left_join_topics Whether or not TOPICS_TABLE should be CROSS JOIN'ED 778 * @var array author_ary Array of user_id containing the users to filter the results to 779 * @var string author_name An extra username to search on (!empty(author_ary) must be true, to be relevant) 780 * @var array ex_fid_ary Which forums not to search on 781 * @var int topic_id Limit the search to this topic_id only 782 * @var string sql_sort_table Extra tables to include in the SQL query. 783 * Used in conjunction with sql_sort_join 784 * @var string sql_sort_join SQL conditions to join all the tables used together. 785 * Used in conjunction with sql_sort_table 786 * @var int sort_days Time, in days, of the oldest possible post to list 787 * @var string sql_where An array of the current WHERE clause conditions 788 * @var string sql_match Which columns to do the search on 789 * @var string sql_match_where Extra conditions to use to properly filter the matching process 790 * @var bool group_by Whether or not the SQL query requires a GROUP BY for the elements in the SELECT clause 791 * @var string sort_by_sql The possible predefined sort types 792 * @var string sort_key The sort type used from the possible sort types 793 * @var string sort_dir "a" for ASC or "d" dor DESC for the sort order used 794 * @var string sql_sort The result SQL when processing sort_by_sql + sort_key + sort_dir 795 * @var int start How many posts to skip in the search results (used for pagination) 796 * @since 3.1.5-RC1 797 */ 798 $vars = array( 799 'search_query', 800 'must_not_contain_ids', 801 'must_exclude_one_ids', 802 'must_contain_ids', 803 'total_results', 804 'sql_array', 805 'left_join_topics', 806 'author_ary', 807 'author_name', 808 'ex_fid_ary', 809 'topic_id', 810 'sql_sort_table', 811 'sql_sort_join', 812 'sort_days', 813 'sql_where', 814 'sql_match', 815 'sql_match_where', 816 'group_by', 817 'sort_by_sql', 818 'sort_key', 819 'sort_dir', 820 'sql_sort', 821 'start', 822 ); 823 extract($this->phpbb_dispatcher->trigger_event('core.search_native_keywords_count_query_before', compact($vars))); 824 825 if ($topic_id) 826 { 827 $sql_where[] = 'p.topic_id = ' . $topic_id; 828 } 829 830 if (sizeof($author_ary)) 831 { 832 if ($author_name) 833 { 834 // first one matches post of registered users, second one guests and deleted users 835 $sql_author = '(' . $this->db->sql_in_set('p.poster_id', array_diff($author_ary, array(ANONYMOUS)), false, true) . ' OR p.post_username ' . $author_name . ')'; 836 } 837 else 838 { 839 $sql_author = $this->db->sql_in_set('p.poster_id', $author_ary); 840 } 841 $sql_where[] = $sql_author; 842 } 843 844 if (sizeof($ex_fid_ary)) 845 { 846 $sql_where[] = $this->db->sql_in_set('p.forum_id', $ex_fid_ary, true); 847 } 848 849 if ($sort_days) 850 { 851 $sql_where[] = 'p.post_time >= ' . (time() - ($sort_days * 86400)); 852 } 853 854 $sql_array['WHERE'] = implode(' AND ', $sql_where); 855 856 $is_mysql = false; 857 // if the total result count is not cached yet, retrieve it from the db 858 if (!$total_results) 859 { 860 $sql = ''; 861 $sql_array_count = $sql_array; 862 863 if ($left_join_topics) 864 { 865 $sql_array_count['LEFT_JOIN'][] = array( 866 'FROM' => array(TOPICS_TABLE => 't'), 867 'ON' => 'p.topic_id = t.topic_id' 868 ); 869 } 870 871 switch ($this->db->get_sql_layer()) 872 { 873 case 'mysql4': 874 case 'mysqli': 875 876 // 3.x does not support SQL_CALC_FOUND_ROWS 877 // $sql_array['SELECT'] = 'SQL_CALC_FOUND_ROWS ' . $sql_array['SELECT']; 878 $is_mysql = true; 879 880 break; 881 882 case 'sqlite': 883 case 'sqlite3': 884 $sql_array_count['SELECT'] = ($type == 'posts') ? 'DISTINCT p.post_id' : 'DISTINCT p.topic_id'; 885 $sql = 'SELECT COUNT(' . (($type == 'posts') ? 'post_id' : 'topic_id') . ') as total_results 886 FROM (' . $this->db->sql_build_query('SELECT', $sql_array_count) . ')'; 887 888 // no break 889 890 default: 891 $sql_array_count['SELECT'] = ($type == 'posts') ? 'COUNT(DISTINCT p.post_id) AS total_results' : 'COUNT(DISTINCT p.topic_id) AS total_results'; 892 $sql = (!$sql) ? $this->db->sql_build_query('SELECT', $sql_array_count) : $sql; 893 894 $result = $this->db->sql_query($sql); 895 $total_results = (int) $this->db->sql_fetchfield('total_results'); 896 $this->db->sql_freeresult($result); 897 898 if (!$total_results) 899 { 900 return false; 901 } 902 break; 903 } 904 905 unset($sql_array_count, $sql); 906 } 907 908 // Build sql strings for sorting 909 $sql_sort = $sort_by_sql[$sort_key] . (($sort_dir == 'a') ? ' ASC' : ' DESC'); 910 911 switch ($sql_sort[0]) 912 { 913 case 'u': 914 $sql_array['FROM'][USERS_TABLE] = 'u'; 915 $sql_where[] = 'u.user_id = p.poster_id '; 916 break; 917 918 case 't': 919 $left_join_topics = true; 920 break; 921 922 case 'f': 923 $sql_array['FROM'][FORUMS_TABLE] = 'f'; 924 $sql_where[] = 'f.forum_id = p.forum_id'; 925 break; 926 } 927 928 if ($left_join_topics) 929 { 930 $sql_array['LEFT_JOIN'][] = array( 931 'FROM' => array(TOPICS_TABLE => 't'), 932 'ON' => 'p.topic_id = t.topic_id' 933 ); 934 } 935 936 // if using mysql and the total result count is not calculated yet, get it from the db 937 if (!$total_results && $is_mysql) 938 { 939 // Also count rows for the query as if there was not LIMIT. Add SQL_CALC_FOUND_ROWS to SQL 940 $sql_array['SELECT'] = 'SQL_CALC_FOUND_ROWS ' . $sql_array['SELECT']; 941 } 942 943 $sql_array['WHERE'] = implode(' AND ', $sql_where); 944 $sql_array['GROUP_BY'] = ($group_by) ? (($type == 'posts') ? 'p.post_id' : 'p.topic_id') . ', ' . $sort_by_sql[$sort_key] : ''; 945 $sql_array['ORDER_BY'] = $sql_sort; 946 947 unset($sql_where, $sql_sort, $group_by); 948 949 $sql = $this->db->sql_build_query('SELECT', $sql_array); 950 $result = $this->db->sql_query_limit($sql, $this->config['search_block_size'], $start); 951 952 while ($row = $this->db->sql_fetchrow($result)) 953 { 954 $id_ary[] = (int) $row[(($type == 'posts') ? 'post_id' : 'topic_id')]; 955 } 956 $this->db->sql_freeresult($result); 957 958 if (!$total_results && $is_mysql) 959 { 960 // Get the number of results as calculated by MySQL 961 $sql_count = 'SELECT FOUND_ROWS() as total_results'; 962 $result = $this->db->sql_query($sql_count); 963 $total_results = (int) $this->db->sql_fetchfield('total_results'); 964 $this->db->sql_freeresult($result); 965 966 if (!$total_results) 967 { 968 return false; 969 } 970 } 971 972 if ($start >= $total_results) 973 { 974 $start = floor(($total_results - 1) / $per_page) * $per_page; 975 976 $result = $this->db->sql_query_limit($sql, $this->config['search_block_size'], $start); 977 978 while ($row = $this->db->sql_fetchrow($result)) 979 { 980 $id_ary[] = (int) $row[(($type == 'posts') ? 'post_id' : 'topic_id')]; 981 } 982 $this->db->sql_freeresult($result); 983 984 } 985 986 // store the ids, from start on then delete anything that isn't on the current page because we only need ids for one page 987 $this->save_ids($search_key, $this->search_query, $author_ary, $total_results, $id_ary, $start, $sort_dir); 988 $id_ary = array_slice($id_ary, 0, (int) $per_page); 989 990 return $total_results; 991 } 992 993 /** 994 * Performs a search on an author's posts without caring about message contents. Depends on display specific params 995 * 996 * @param string $type contains either posts or topics depending on what should be searched for 997 * @param boolean $firstpost_only if true, only topic starting posts will be considered 998 * @param array $sort_by_sql contains SQL code for the ORDER BY part of a query 999 * @param string $sort_key is the key of $sort_by_sql for the selected sorting 1000 * @param string $sort_dir is either a or d representing ASC and DESC 1001 * @param string $sort_days specifies the maximum amount of days a post may be old 1002 * @param array $ex_fid_ary specifies an array of forum ids which should not be searched 1003 * @param string $post_visibility specifies which types of posts the user can view in which forums 1004 * @param int $topic_id is set to 0 or a topic id, if it is not 0 then only posts in this topic should be searched 1005 * @param array $author_ary an array of author ids 1006 * @param string $author_name specifies the author match, when ANONYMOUS is also a search-match 1007 * @param array &$id_ary passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered 1008 * @param int $start indicates the first index of the page 1009 * @param int $per_page number of ids each page is supposed to contain 1010 * @return boolean|int total number of results 1011 */ 1012 public function author_search($type, $firstpost_only, $sort_by_sql, $sort_key, $sort_dir, $sort_days, $ex_fid_ary, $post_visibility, $topic_id, $author_ary, $author_name, &$id_ary, &$start, $per_page) 1013 { 1014 // No author? No posts 1015 if (!sizeof($author_ary)) 1016 { 1017 return 0; 1018 } 1019 1020 // generate a search_key from all the options to identify the results 1021 $search_key_array = array( 1022 '', 1023 $type, 1024 ($firstpost_only) ? 'firstpost' : '', 1025 '', 1026 '', 1027 $sort_days, 1028 $sort_key, 1029 $topic_id, 1030 implode(',', $ex_fid_ary), 1031 $post_visibility, 1032 implode(',', $author_ary), 1033 $author_name, 1034 ); 1035 1036 /** 1037 * Allow changing the search_key for cached results 1038 * 1039 * @event core.search_native_by_author_modify_search_key 1040 * @var array search_key_array Array with search parameters to generate the search_key 1041 * @var string type Searching type ('posts', 'topics') 1042 * @var boolean firstpost_only Flag indicating if only topic starting posts are considered 1043 * @var int sort_days Time, in days, of the oldest possible post to list 1044 * @var string sort_key The sort type used from the possible sort types 1045 * @var int topic_id Limit the search to this topic_id only 1046 * @var array ex_fid_ary Which forums not to search on 1047 * @var string post_visibility Post visibility data 1048 * @var array author_ary Array of user_id containing the users to filter the results to 1049 * @var string author_name The username to search on 1050 * @since 3.1.7-RC1 1051 */ 1052 $vars = array( 1053 'search_key_array', 1054 'type', 1055 'firstpost_only', 1056 'sort_days', 1057 'sort_key', 1058 'topic_id', 1059 'ex_fid_ary', 1060 'post_visibility', 1061 'author_ary', 1062 'author_name', 1063 ); 1064 extract($this->phpbb_dispatcher->trigger_event('core.search_native_by_author_modify_search_key', compact($vars))); 1065 1066 $search_key = md5(implode('#', $search_key_array)); 1067 1068 // try reading the results from cache 1069 $total_results = 0; 1070 if ($this->obtain_ids($search_key, $total_results, $id_ary, $start, $per_page, $sort_dir) == SEARCH_RESULT_IN_CACHE) 1071 { 1072 return $total_results; 1073 } 1074 1075 $id_ary = array(); 1076 1077 // Create some display specific sql strings 1078 if ($author_name) 1079 { 1080 // first one matches post of registered users, second one guests and deleted users 1081 $sql_author = '(' . $this->db->sql_in_set('p.poster_id', array_diff($author_ary, array(ANONYMOUS)), false, true) . ' OR p.post_username ' . $author_name . ')'; 1082 } 1083 else 1084 { 1085 $sql_author = $this->db->sql_in_set('p.poster_id', $author_ary); 1086 } 1087 $sql_fora = (sizeof($ex_fid_ary)) ? ' AND ' . $this->db->sql_in_set('p.forum_id', $ex_fid_ary, true) : ''; 1088 $sql_time = ($sort_days) ? ' AND p.post_time >= ' . (time() - ($sort_days * 86400)) : ''; 1089 $sql_topic_id = ($topic_id) ? ' AND p.topic_id = ' . (int) $topic_id : ''; 1090 $sql_firstpost = ($firstpost_only) ? ' AND p.post_id = t.topic_first_post_id' : ''; 1091 $post_visibility = ($post_visibility) ? ' AND ' . $post_visibility : ''; 1092 1093 // Build sql strings for sorting 1094 $sql_sort = $sort_by_sql[$sort_key] . (($sort_dir == 'a') ? ' ASC' : ' DESC'); 1095 $sql_sort_table = $sql_sort_join = ''; 1096 switch ($sql_sort[0]) 1097 { 1098 case 'u': 1099 $sql_sort_table = USERS_TABLE . ' u, '; 1100 $sql_sort_join = ' AND u.user_id = p.poster_id '; 1101 break; 1102 1103 case 't': 1104 $sql_sort_table = ($type == 'posts' && !$firstpost_only) ? TOPICS_TABLE . ' t, ' : ''; 1105 $sql_sort_join = ($type == 'posts' && !$firstpost_only) ? ' AND t.topic_id = p.topic_id ' : ''; 1106 break; 1107 1108 case 'f': 1109 $sql_sort_table = FORUMS_TABLE . ' f, '; 1110 $sql_sort_join = ' AND f.forum_id = p.forum_id '; 1111 break; 1112 } 1113 1114 $select = ($type == 'posts') ? 'p.post_id' : 't.topic_id'; 1115 $is_mysql = false; 1116 1117 /** 1118 * Allow changing the query used to search for posts by author in fulltext_native 1119 * 1120 * @event core.search_native_author_count_query_before 1121 * @var int total_results The previous result count for the format of the query. 1122 * Set to 0 to force a re-count 1123 * @var string type The type of search being made 1124 * @var string select SQL SELECT clause for what to get 1125 * @var string sql_sort_table CROSS JOIN'ed table to allow doing the sort chosen 1126 * @var string sql_sort_join Condition to define how to join the CROSS JOIN'ed table specifyed in sql_sort_table 1127 * @var array sql_author SQL WHERE condition for the post author ids 1128 * @var int topic_id Limit the search to this topic_id only 1129 * @var string sort_by_sql The possible predefined sort types 1130 * @var string sort_key The sort type used from the possible sort types 1131 * @var string sort_dir "a" for ASC or "d" dor DESC for the sort order used 1132 * @var string sql_sort The result SQL when processing sort_by_sql + sort_key + sort_dir 1133 * @var string sort_days Time, in days, that the oldest post showing can have 1134 * @var string sql_time The SQL to search on the time specifyed by sort_days 1135 * @var bool firstpost_only Wether or not to search only on the first post of the topics 1136 * @var string sql_firstpost The SQL used in the WHERE claused to filter by firstpost. 1137 * @var array ex_fid_ary Forum ids that must not be searched on 1138 * @var array sql_fora SQL query for ex_fid_ary 1139 * @var int start How many posts to skip in the search results (used for pagination) 1140 * @since 3.1.5-RC1 1141 */ 1142 $vars = array( 1143 'total_results', 1144 'type', 1145 'select', 1146 'sql_sort_table', 1147 'sql_sort_join', 1148 'sql_author', 1149 'topic_id', 1150 'sort_by_sql', 1151 'sort_key', 1152 'sort_dir', 1153 'sql_sort', 1154 'sort_days', 1155 'sql_time', 1156 'firstpost_only', 1157 'sql_firstpost', 1158 'ex_fid_ary', 1159 'sql_fora', 1160 'start', 1161 ); 1162 extract($this->phpbb_dispatcher->trigger_event('core.search_native_author_count_query_before', compact($vars))); 1163 1164 // If the cache was completely empty count the results 1165 if (!$total_results) 1166 { 1167 switch ($this->db->get_sql_layer()) 1168 { 1169 case 'mysql4': 1170 case 'mysqli': 1171 // $select = 'SQL_CALC_FOUND_ROWS ' . $select; 1172 $is_mysql = true; 1173 break; 1174 1175 default: 1176 if ($type == 'posts') 1177 { 1178 $sql = 'SELECT COUNT(p.post_id) as total_results 1179 FROM ' . POSTS_TABLE . ' p' . (($firstpost_only) ? ', ' . TOPICS_TABLE . ' t ' : ' ') . " 1180 WHERE $sql_author 1181 $sql_topic_id 1182 $sql_firstpost 1183 $post_visibility 1184 $sql_fora 1185 $sql_time"; 1186 } 1187 else 1188 { 1189 if ($this->db->get_sql_layer() == 'sqlite' || $this->db->get_sql_layer() == 'sqlite3') 1190 { 1191 $sql = 'SELECT COUNT(topic_id) as total_results 1192 FROM (SELECT DISTINCT t.topic_id'; 1193 } 1194 else 1195 { 1196 $sql = 'SELECT COUNT(DISTINCT t.topic_id) as total_results'; 1197 } 1198 1199 $sql .= ' FROM ' . TOPICS_TABLE . ' t, ' . POSTS_TABLE . " p 1200 WHERE $sql_author 1201 $sql_topic_id 1202 $sql_firstpost 1203 $post_visibility 1204 $sql_fora 1205 AND t.topic_id = p.topic_id 1206 $sql_time" . (($this->db->get_sql_layer() == 'sqlite' || $this->db->get_sql_layer() == 'sqlite3') ? ')' : ''); 1207 } 1208 $result = $this->db->sql_query($sql); 1209 1210 $total_results = (int) $this->db->sql_fetchfield('total_results'); 1211 $this->db->sql_freeresult($result); 1212 1213 if (!$total_results) 1214 { 1215 return false; 1216 } 1217 break; 1218 } 1219 } 1220 1221 // Build the query for really selecting the post_ids 1222 if ($type == 'posts') 1223 { 1224 $sql = "SELECT $select 1225 FROM " . $sql_sort_table . POSTS_TABLE . ' p' . (($firstpost_only) ? ', ' . TOPICS_TABLE . ' t' : '') . " 1226 WHERE $sql_author 1227 $sql_topic_id 1228 $sql_firstpost 1229 $post_visibility 1230 $sql_fora 1231 $sql_sort_join 1232 $sql_time 1233 ORDER BY $sql_sort"; 1234 $field = 'post_id'; 1235 } 1236 else 1237 { 1238 $sql = "SELECT $select 1239 FROM " . $sql_sort_table . TOPICS_TABLE . ' t, ' . POSTS_TABLE . " p 1240 WHERE $sql_author 1241 $sql_topic_id 1242 $sql_firstpost 1243 $post_visibility 1244 $sql_fora 1245 AND t.topic_id = p.topic_id 1246 $sql_sort_join 1247 $sql_time 1248 GROUP BY t.topic_id, " . $sort_by_sql[$sort_key] . ' 1249 ORDER BY ' . $sql_sort; 1250 $field = 'topic_id'; 1251 } 1252 1253 // Only read one block of posts from the db and then cache it 1254 $result = $this->db->sql_query_limit($sql, $this->config['search_block_size'], $start); 1255 1256 while ($row = $this->db->sql_fetchrow($result)) 1257 { 1258 $id_ary[] = (int) $row[$field]; 1259 } 1260 $this->db->sql_freeresult($result); 1261 1262 if (!$total_results && $is_mysql) 1263 { 1264 // Count rows for the executed queries. Replace $select within $sql with SQL_CALC_FOUND_ROWS, and run it. 1265 $sql_calc = str_replace('SELECT ' . $select, 'SELECT SQL_CALC_FOUND_ROWS ' . $select, $sql); 1266 1267 $result = $this->db->sql_query($sql_calc); 1268 $this->db->sql_freeresult($result); 1269 1270 $sql_count = 'SELECT FOUND_ROWS() as total_results'; 1271 $result = $this->db->sql_query($sql_count); 1272 $total_results = (int) $this->db->sql_fetchfield('total_results'); 1273 $this->db->sql_freeresult($result); 1274 1275 if (!$total_results) 1276 { 1277 return false; 1278 } 1279 } 1280 1281 if ($start >= $total_results) 1282 { 1283 $start = floor(($total_results - 1) / $per_page) * $per_page; 1284 1285 $result = $this->db->sql_query_limit($sql, $this->config['search_block_size'], $start); 1286 1287 while ($row = $this->db->sql_fetchrow($result)) 1288 { 1289 $id_ary[] = (int) $row[$field]; 1290 } 1291 $this->db->sql_freeresult($result); 1292 } 1293 1294 if (sizeof($id_ary)) 1295 { 1296 $this->save_ids($search_key, '', $author_ary, $total_results, $id_ary, $start, $sort_dir); 1297 $id_ary = array_slice($id_ary, 0, $per_page); 1298 1299 return $total_results; 1300 } 1301 return false; 1302 } 1303 1304 /** 1305 * Split a text into words of a given length 1306 * 1307 * The text is converted to UTF-8, cleaned up, and split. Then, words that 1308 * conform to the defined length range are returned in an array. 1309 * 1310 * NOTE: duplicates are NOT removed from the return array 1311 * 1312 * @param string $text Text to split, encoded in UTF-8 1313 * @return array Array of UTF-8 words 1314 */ 1315 public function split_message($text) 1316 { 1317 $match = $words = array(); 1318 1319 /** 1320 * Taken from the original code 1321 */ 1322 // Do not index code 1323 $match[] = '#\[code(?:=.*?)?(\:?[0-9a-z]{5,})\].*?\[\/code(\:?[0-9a-z]{5,})\]#is'; 1324 // BBcode 1325 $match[] = '#\[\/?[a-z0-9\*\+\-]+(?:=.*?)?(?::[a-z])?(\:?[0-9a-z]{5,})\]#'; 1326 1327 $min = $this->word_length['min']; 1328 $max = $this->word_length['max']; 1329 1330 $isset_min = $min - 1; 1331 1332 /** 1333 * Clean up the string, remove HTML tags, remove BBCodes 1334 */ 1335 $word = strtok($this->cleanup(preg_replace($match, ' ', strip_tags($text)), -1), ' '); 1336 1337 while (strlen($word)) 1338 { 1339 if (strlen($word) > 255 || strlen($word) <= $isset_min) 1340 { 1341 /** 1342 * Words longer than 255 bytes are ignored. This will have to be 1343 * changed whenever we change the length of search_wordlist.word_text 1344 * 1345 * Words shorter than $isset_min bytes are ignored, too 1346 */ 1347 $word = strtok(' '); 1348 continue; 1349 } 1350 1351 $len = utf8_strlen($word); 1352 1353 /** 1354 * Test whether the word is too short to be indexed. 1355 * 1356 * Note that this limit does NOT apply to CJK and Hangul 1357 */ 1358 if ($len < $min) 1359 { 1360 /** 1361 * Note: this could be optimized. If the codepoint is lower than Hangul's range 1362 * we know that it will also be lower than CJK ranges 1363 */ 1364 if ((strncmp($word, UTF8_HANGUL_FIRST, 3) < 0 || strncmp($word, UTF8_HANGUL_LAST, 3) > 0) 1365 && (strncmp($word, UTF8_CJK_FIRST, 3) < 0 || strncmp($word, UTF8_CJK_LAST, 3) > 0) 1366 && (strncmp($word, UTF8_CJK_B_FIRST, 4) < 0 || strncmp($word, UTF8_CJK_B_LAST, 4) > 0)) 1367 { 1368 $word = strtok(' '); 1369 continue; 1370 } 1371 } 1372 1373 $words[] = $word; 1374 $word = strtok(' '); 1375 } 1376 1377 return $words; 1378 } 1379 1380 /** 1381 * Updates wordlist and wordmatch tables when a message is posted or changed 1382 * 1383 * @param string $mode Contains the post mode: edit, post, reply, quote 1384 * @param int $post_id The id of the post which is modified/created 1385 * @param string &$message New or updated post content 1386 * @param string &$subject New or updated post subject 1387 * @param int $poster_id Post author's user id 1388 * @param int $forum_id The id of the forum in which the post is located 1389 */ 1390 public function index($mode, $post_id, &$message, &$subject, $poster_id, $forum_id) 1391 { 1392 if (!$this->config['fulltext_native_load_upd']) 1393 { 1394 /** 1395 * The search indexer is disabled, return 1396 */ 1397 return; 1398 } 1399 1400 // Split old and new post/subject to obtain array of 'words' 1401 $split_text = $this->split_message($message); 1402 $split_title = $this->split_message($subject); 1403 1404 $cur_words = array('post' => array(), 'title' => array()); 1405 1406 $words = array(); 1407 if ($mode == 'edit') 1408 { 1409 $words['add']['post'] = array(); 1410 $words['add']['title'] = array(); 1411 $words['del']['post'] = array(); 1412 $words['del']['title'] = array(); 1413 1414 $sql = 'SELECT w.word_id, w.word_text, m.title_match 1415 FROM ' . SEARCH_WORDLIST_TABLE . ' w, ' . SEARCH_WORDMATCH_TABLE . " m 1416 WHERE m.post_id = $post_id 1417 AND w.word_id = m.word_id"; 1418 $result = $this->db->sql_query($sql); 1419 1420 while ($row = $this->db->sql_fetchrow($result)) 1421 { 1422 $which = ($row['title_match']) ? 'title' : 'post'; 1423 $cur_words[$which][$row['word_text']] = $row['word_id']; 1424 } 1425 $this->db->sql_freeresult($result); 1426 1427 $words['add']['post'] = array_diff($split_text, array_keys($cur_words['post'])); 1428 $words['add']['title'] = array_diff($split_title, array_keys($cur_words['title'])); 1429 $words['del']['post'] = array_diff(array_keys($cur_words['post']), $split_text); 1430 $words['del']['title'] = array_diff(array_keys($cur_words['title']), $split_title); 1431 } 1432 else 1433 { 1434 $words['add']['post'] = $split_text; 1435 $words['add']['title'] = $split_title; 1436 $words['del']['post'] = array(); 1437 $words['del']['title'] = array(); 1438 } 1439 unset($split_text); 1440 unset($split_title); 1441 1442 // Get unique words from the above arrays 1443 $unique_add_words = array_unique(array_merge($words['add']['post'], $words['add']['title'])); 1444 1445 // We now have unique arrays of all words to be added and removed and 1446 // individual arrays of added and removed words for text and title. What 1447 // we need to do now is add the new words (if they don't already exist) 1448 // and then add (or remove) matches between the words and this post 1449 if (sizeof($unique_add_words)) 1450 { 1451 $sql = 'SELECT word_id, word_text 1452 FROM ' . SEARCH_WORDLIST_TABLE . ' 1453 WHERE ' . $this->db->sql_in_set('word_text', $unique_add_words); 1454 $result = $this->db->sql_query($sql); 1455 1456 $word_ids = array(); 1457 while ($row = $this->db->sql_fetchrow($result)) 1458 { 1459 $word_ids[$row['word_text']] = $row['word_id']; 1460 } 1461 $this->db->sql_freeresult($result); 1462 $new_words = array_diff($unique_add_words, array_keys($word_ids)); 1463 1464 $this->db->sql_transaction('begin'); 1465 if (sizeof($new_words)) 1466 { 1467 $sql_ary = array(); 1468 1469 foreach ($new_words as $word) 1470 { 1471 $sql_ary[] = array('word_text' => (string) $word, 'word_count' => 0); 1472 } 1473 $this->db->sql_return_on_error(true); 1474 $this->db->sql_multi_insert(SEARCH_WORDLIST_TABLE, $sql_ary); 1475 $this->db->sql_return_on_error(false); 1476 } 1477 unset($new_words, $sql_ary); 1478 } 1479 else 1480 { 1481 $this->db->sql_transaction('begin'); 1482 } 1483 1484 // now update the search match table, remove links to removed words and add links to new words 1485 foreach ($words['del'] as $word_in => $word_ary) 1486 { 1487 $title_match = ($word_in == 'title') ? 1 : 0; 1488 1489 if (sizeof($word_ary)) 1490 { 1491 $sql_in = array(); 1492 foreach ($word_ary as $word) 1493 { 1494 $sql_in[] = $cur_words[$word_in][$word]; 1495 } 1496 1497 $sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . ' 1498 WHERE ' . $this->db->sql_in_set('word_id', $sql_in) . ' 1499 AND post_id = ' . intval($post_id) . " 1500 AND title_match = $title_match"; 1501 $this->db->sql_query($sql); 1502 1503 $sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . ' 1504 SET word_count = word_count - 1 1505 WHERE ' . $this->db->sql_in_set('word_id', $sql_in) . ' 1506 AND word_count > 0'; 1507 $this->db->sql_query($sql); 1508 1509 unset($sql_in); 1510 } 1511 } 1512 1513 $this->db->sql_return_on_error(true); 1514 foreach ($words['add'] as $word_in => $word_ary) 1515 { 1516 $title_match = ($word_in == 'title') ? 1 : 0; 1517 1518 if (sizeof($word_ary)) 1519 { 1520 $sql = 'INSERT INTO ' . SEARCH_WORDMATCH_TABLE . ' (post_id, word_id, title_match) 1521 SELECT ' . (int) $post_id . ', word_id, ' . (int) $title_match . ' 1522 FROM ' . SEARCH_WORDLIST_TABLE . ' 1523 WHERE ' . $this->db->sql_in_set('word_text', $word_ary); 1524 $this->db->sql_query($sql); 1525 1526 $sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . ' 1527 SET word_count = word_count + 1 1528 WHERE ' . $this->db->sql_in_set('word_text', $word_ary); 1529 $this->db->sql_query($sql); 1530 } 1531 } 1532 $this->db->sql_return_on_error(false); 1533 1534 $this->db->sql_transaction('commit'); 1535 1536 // destroy cached search results containing any of the words removed or added 1537 $this->destroy_cache(array_unique(array_merge($words['add']['post'], $words['add']['title'], $words['del']['post'], $words['del']['title'])), array($poster_id)); 1538 1539 unset($unique_add_words); 1540 unset($words); 1541 unset($cur_words); 1542 } 1543 1544 /** 1545 * Removes entries from the wordmatch table for the specified post_ids 1546 */ 1547 public function index_remove($post_ids, $author_ids, $forum_ids) 1548 { 1549 if (sizeof($post_ids)) 1550 { 1551 $sql = 'SELECT w.word_id, w.word_text, m.title_match 1552 FROM ' . SEARCH_WORDMATCH_TABLE . ' m, ' . SEARCH_WORDLIST_TABLE . ' w 1553 WHERE ' . $this->db->sql_in_set('m.post_id', $post_ids) . ' 1554 AND w.word_id = m.word_id'; 1555 $result = $this->db->sql_query($sql); 1556 1557 $message_word_ids = $title_word_ids = $word_texts = array(); 1558 while ($row = $this->db->sql_fetchrow($result)) 1559 { 1560 if ($row['title_match']) 1561 { 1562 $title_word_ids[] = $row['word_id']; 1563 } 1564 else 1565 { 1566 $message_word_ids[] = $row['word_id']; 1567 } 1568 $word_texts[] = $row['word_text']; 1569 } 1570 $this->db->sql_freeresult($result); 1571 1572 if (sizeof($title_word_ids)) 1573 { 1574 $sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . ' 1575 SET word_count = word_count - 1 1576 WHERE ' . $this->db->sql_in_set('word_id', $title_word_ids) . ' 1577 AND word_count > 0'; 1578 $this->db->sql_query($sql); 1579 } 1580 1581 if (sizeof($message_word_ids)) 1582 { 1583 $sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . ' 1584 SET word_count = word_count - 1 1585 WHERE ' . $this->db->sql_in_set('word_id', $message_word_ids) . ' 1586 AND word_count > 0'; 1587 $this->db->sql_query($sql); 1588 } 1589 1590 unset($title_word_ids); 1591 unset($message_word_ids); 1592 1593 $sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . ' 1594 WHERE ' . $this->db->sql_in_set('post_id', $post_ids); 1595 $this->db->sql_query($sql); 1596 } 1597 1598 $this->destroy_cache(array_unique($word_texts), array_unique($author_ids)); 1599 } 1600 1601 /** 1602 * Tidy up indexes: Tag 'common words' and remove 1603 * words no longer referenced in the match table 1604 */ 1605 public function tidy() 1606 { 1607 // Is the fulltext indexer disabled? If yes then we need not 1608 // carry on ... it's okay ... I know when I'm not wanted boo hoo 1609 if (!$this->config['fulltext_native_load_upd']) 1610 { 1611 set_config('search_last_gc', time(), true); 1612 return; 1613 } 1614 1615 $destroy_cache_words = array(); 1616 1617 // Remove common words 1618 if ($this->config['num_posts'] >= 100 && $this->config['fulltext_native_common_thres']) 1619 { 1620 $common_threshold = ((double) $this->config['fulltext_native_common_thres']) / 100.0; 1621 // First, get the IDs of common words 1622 $sql = 'SELECT word_id, word_text 1623 FROM ' . SEARCH_WORDLIST_TABLE . ' 1624 WHERE word_count > ' . floor($this->config['num_posts'] * $common_threshold) . ' 1625 OR word_common = 1'; 1626 $result = $this->db->sql_query($sql); 1627 1628 $sql_in = array(); 1629 while ($row = $this->db->sql_fetchrow($result)) 1630 { 1631 $sql_in[] = $row['word_id']; 1632 $destroy_cache_words[] = $row['word_text']; 1633 } 1634 $this->db->sql_freeresult($result); 1635 1636 if (sizeof($sql_in)) 1637 { 1638 // Flag the words 1639 $sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . ' 1640 SET word_common = 1 1641 WHERE ' . $this->db->sql_in_set('word_id', $sql_in); 1642 $this->db->sql_query($sql); 1643 1644 // by setting search_last_gc to the new time here we make sure that if a user reloads because the 1645 // following query takes too long, he won't run into it again 1646 set_config('search_last_gc', time(), true); 1647 1648 // Delete the matches 1649 $sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . ' 1650 WHERE ' . $this->db->sql_in_set('word_id', $sql_in); 1651 $this->db->sql_query($sql); 1652 } 1653 unset($sql_in); 1654 } 1655 1656 if (sizeof($destroy_cache_words)) 1657 { 1658 // destroy cached search results containing any of the words that are now common or were removed 1659 $this->destroy_cache(array_unique($destroy_cache_words)); 1660 } 1661 1662 set_config('search_last_gc', time(), true); 1663 } 1664 1665 /** 1666 * Deletes all words from the index 1667 */ 1668 public function delete_index($acp_module, $u_action) 1669 { 1670 switch ($this->db->get_sql_layer()) 1671 { 1672 case 'sqlite': 1673 case 'sqlite3': 1674 $this->db->sql_query('DELETE FROM ' . SEARCH_WORDLIST_TABLE); 1675 $this->db->sql_query('DELETE FROM ' . SEARCH_WORDMATCH_TABLE); 1676 $this->db->sql_query('DELETE FROM ' . SEARCH_RESULTS_TABLE); 1677 break; 1678 1679 default: 1680 $this->db->sql_query('TRUNCATE TABLE ' . SEARCH_WORDLIST_TABLE); 1681 $this->db->sql_query('TRUNCATE TABLE ' . SEARCH_WORDMATCH_TABLE); 1682 $this->db->sql_query('TRUNCATE TABLE ' . SEARCH_RESULTS_TABLE); 1683 break; 1684 } 1685 } 1686 1687 /** 1688 * Returns true if both FULLTEXT indexes exist 1689 */ 1690 public function index_created() 1691 { 1692 if (!sizeof($this->stats)) 1693 { 1694 $this->get_stats(); 1695 } 1696 1697 return ($this->stats['total_words'] && $this->stats['total_matches']) ? true : false; 1698 } 1699 1700 /** 1701 * Returns an associative array containing information about the indexes 1702 */ 1703 public function index_stats() 1704 { 1705 if (!sizeof($this->stats)) 1706 { 1707 $this->get_stats(); 1708 } 1709 1710 return array( 1711 $this->user->lang['TOTAL_WORDS'] => $this->stats['total_words'], 1712 $this->user->lang['TOTAL_MATCHES'] => $this->stats['total_matches']); 1713 } 1714 1715 protected function get_stats() 1716 { 1717 $this->stats['total_words'] = $this->db->get_estimated_row_count(SEARCH_WORDLIST_TABLE); 1718 $this->stats['total_matches'] = $this->db->get_estimated_row_count(SEARCH_WORDMATCH_TABLE); 1719 } 1720 1721 /** 1722 * Clean up a text to remove non-alphanumeric characters 1723 * 1724 * This method receives a UTF-8 string, normalizes and validates it, replaces all 1725 * non-alphanumeric characters with strings then returns the result. 1726 * 1727 * Any number of "allowed chars" can be passed as a UTF-8 string in NFC. 1728 * 1729 * @param string $text Text to split, in UTF-8 (not normalized or sanitized) 1730 * @param string $allowed_chars String of special chars to allow 1731 * @param string $encoding Text encoding 1732 * @return string Cleaned up text, only alphanumeric chars are left 1733 * 1734 * @todo \normalizer::cleanup being able to be used? 1735 */ 1736 protected function cleanup($text, $allowed_chars = null, $encoding = 'utf-8') 1737 { 1738 static $conv = array(), $conv_loaded = array(); 1739 $words = $allow = array(); 1740 1741 // Convert the text to UTF-8 1742 $encoding = strtolower($encoding); 1743 if ($encoding != 'utf-8') 1744 { 1745 $text = utf8_recode($text, $encoding); 1746 } 1747 1748 $utf_len_mask = array( 1749 "\xC0" => 2, 1750 "\xD0" => 2, 1751 "\xE0" => 3, 1752 "\xF0" => 4 1753 ); 1754 1755 /** 1756 * Replace HTML entities and NCRs 1757 */ 1758 $text = htmlspecialchars_decode(utf8_decode_ncr($text), ENT_QUOTES); 1759 1760 /** 1761 * Load the UTF-8 normalizer 1762 * 1763 * If we use it more widely, an instance of that class should be held in a 1764 * a global variable instead 1765 */ 1766 \utf_normalizer::nfc($text); 1767 1768 /** 1769 * The first thing we do is: 1770 * 1771 * - convert ASCII-7 letters to lowercase 1772 * - remove the ASCII-7 non-alpha characters 1773 * - remove the bytes that should not appear in a valid UTF-8 string: 0xC0, 1774 * 0xC1 and 0xF5-0xFF 1775 * 1776 * @todo in theory, the third one is already taken care of during normalization and those chars should have been replaced by Unicode replacement chars 1777 */ 1778 $sb_match = "ISTCPAMELRDOJBNHFGVWUQKYXZ\r\n\t!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\xC0\xC1\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF"; 1779 $sb_replace = 'istcpamelrdojbnhfgvwuqkyxz '; 1780 1781 /** 1782 * This is the list of legal ASCII chars, it is automatically extended 1783 * with ASCII chars from $allowed_chars 1784 */ 1785 $legal_ascii = ' eaisntroludcpmghbfvq10xy2j9kw354867z'; 1786 1787 /** 1788 * Prepare an array containing the extra chars to allow 1789 */ 1790 if (isset($allowed_chars[0])) 1791 { 1792 $pos = 0; 1793 $len = strlen($allowed_chars); 1794 do 1795 { 1796 $c = $allowed_chars[$pos]; 1797 1798 if ($c < "\x80") 1799 { 1800 /** 1801 * ASCII char 1802 */ 1803 $sb_pos = strpos($sb_match, $c); 1804 if (is_int($sb_pos)) 1805 { 1806 /** 1807 * Remove the char from $sb_match and its corresponding 1808 * replacement in $sb_replace 1809 */ 1810 $sb_match = substr($sb_match, 0, $sb_pos) . substr($sb_match, $sb_pos + 1); 1811 $sb_replace = substr($sb_replace, 0, $sb_pos) . substr($sb_replace, $sb_pos + 1); 1812 $legal_ascii .= $c; 1813 } 1814 1815 ++$pos; 1816 } 1817 else 1818 { 1819 /** 1820 * UTF-8 char 1821 */ 1822 $utf_len = $utf_len_mask[$c & "\xF0"]; 1823 $allow[substr($allowed_chars, $pos, $utf_len)] = 1; 1824 $pos += $utf_len; 1825 } 1826 } 1827 while ($pos < $len); 1828 } 1829 1830 $text = strtr($text, $sb_match, $sb_replace); 1831 $ret = ''; 1832 1833 $pos = 0; 1834 $len = strlen($text); 1835 1836 do 1837 { 1838 /** 1839 * Do all consecutive ASCII chars at once 1840 */ 1841 if ($spn = strspn($text, $legal_ascii, $pos)) 1842 { 1843 $ret .= substr($text, $pos, $spn); 1844 $pos += $spn; 1845 } 1846 1847 if ($pos >= $len) 1848 { 1849 return $ret; 1850 } 1851 1852 /** 1853 * Capture the UTF char 1854 */ 1855 $utf_len = $utf_len_mask[$text[$pos] & "\xF0"]; 1856 $utf_char = substr($text, $pos, $utf_len); 1857 $pos += $utf_len; 1858 1859 if (($utf_char >= UTF8_HANGUL_FIRST && $utf_char <= UTF8_HANGUL_LAST) 1860 || ($utf_char >= UTF8_CJK_FIRST && $utf_char <= UTF8_CJK_LAST) 1861 || ($utf_char >= UTF8_CJK_B_FIRST && $utf_char <= UTF8_CJK_B_LAST)) 1862 { 1863 /** 1864 * All characters within these ranges are valid 1865 * 1866 * We separate them with a space in order to index each character 1867 * individually 1868 */ 1869 $ret .= ' ' . $utf_char . ' '; 1870 continue; 1871 } 1872 1873 if (isset($allow[$utf_char])) 1874 { 1875 /** 1876 * The char is explicitly allowed 1877 */ 1878 $ret .= $utf_char; 1879 continue; 1880 } 1881 1882 if (isset($conv[$utf_char])) 1883 { 1884 /** 1885 * The char is mapped to something, maybe to itself actually 1886 */ 1887 $ret .= $conv[$utf_char]; 1888 continue; 1889 } 1890 1891 /** 1892 * The char isn't mapped, but did we load its conversion table? 1893 * 1894 * The search indexer table is split into blocks. The block number of 1895 * each char is equal to its codepoint right-shifted for 11 bits. It 1896 * means that out of the 11, 16 or 21 meaningful bits of a 2-, 3- or 1897 * 4- byte sequence we only keep the leftmost 0, 5 or 10 bits. Thus, 1898 * all UTF chars encoded in 2 bytes are in the same first block. 1899 */ 1900 if (isset($utf_char[2])) 1901 { 1902 if (isset($utf_char[3])) 1903 { 1904 /** 1905 * 1111 0nnn 10nn nnnn 10nx xxxx 10xx xxxx 1906 * 0000 0111 0011 1111 0010 0000 1907 */ 1908 $idx = ((ord($utf_char[0]) & 0x07) << 7) | ((ord($utf_char[1]) & 0x3F) << 1) | ((ord($utf_char[2]) & 0x20) >> 5); 1909 } 1910 else 1911 { 1912 /** 1913 * 1110 nnnn 10nx xxxx 10xx xxxx 1914 * 0000 0111 0010 0000 1915 */ 1916 $idx = ((ord($utf_char[0]) & 0x07) << 1) | ((ord($utf_char[1]) & 0x20) >> 5); 1917 } 1918 } 1919 else 1920 { 1921 /** 1922 * 110x xxxx 10xx xxxx 1923 * 0000 0000 0000 0000 1924 */ 1925 $idx = 0; 1926 } 1927 1928 /** 1929 * Check if the required conv table has been loaded already 1930 */ 1931 if (!isset($conv_loaded[$idx])) 1932 { 1933 $conv_loaded[$idx] = 1; 1934 $file = $this->phpbb_root_path . 'includes/utf/data/search_indexer_' . $idx . '.' . $this->php_ext; 1935 1936 if (file_exists($file)) 1937 { 1938 $conv += include($file); 1939 } 1940 } 1941 1942 if (isset($conv[$utf_char])) 1943 { 1944 $ret .= $conv[$utf_char]; 1945 } 1946 else 1947 { 1948 /** 1949 * We add an entry to the conversion table so that we 1950 * don't have to convert to codepoint and perform the checks 1951 * that are above this block 1952 */ 1953 $conv[$utf_char] = ' '; 1954 $ret .= ' '; 1955 } 1956 } 1957 while (1); 1958 1959 return $ret; 1960 } 1961 1962 /** 1963 * Returns a list of options for the ACP to display 1964 */ 1965 public function acp() 1966 { 1967 /** 1968 * if we need any options, copied from fulltext_native for now, will have to be adjusted or removed 1969 */ 1970 1971 $tpl = ' 1972 <dl> 1973 <dt><label for="fulltext_native_load_upd">' . $this->user->lang['YES_SEARCH_UPDATE'] . $this->user->lang['COLON'] . '</label><br /><span>' . $this->user->lang['YES_SEARCH_UPDATE_EXPLAIN'] . '</span></dt> 1974 <dd><label><input type="radio" id="fulltext_native_load_upd" name="config[fulltext_native_load_upd]" value="1"' . (($this->config['fulltext_native_load_upd']) ? ' checked="checked"' : '') . ' class="radio" /> ' . $this->user->lang['YES'] . '</label><label><input type="radio" name="config[fulltext_native_load_upd]" value="0"' . ((!$this->config['fulltext_native_load_upd']) ? ' checked="checked"' : '') . ' class="radio" /> ' . $this->user->lang['NO'] . '</label></dd> 1975 </dl> 1976 <dl> 1977 <dt><label for="fulltext_native_min_chars">' . $this->user->lang['MIN_SEARCH_CHARS'] . $this->user->lang['COLON'] . '</label><br /><span>' . $this->user->lang['MIN_SEARCH_CHARS_EXPLAIN'] . '</span></dt> 1978 <dd><input id="fulltext_native_min_chars" type="number" min="0" max="255" name="config[fulltext_native_min_chars]" value="' . (int) $this->config['fulltext_native_min_chars'] . '" /></dd> 1979 </dl> 1980 <dl> 1981 <dt><label for="fulltext_native_max_chars">' . $this->user->lang['MAX_SEARCH_CHARS'] . $this->user->lang['COLON'] . '</label><br /><span>' . $this->user->lang['MAX_SEARCH_CHARS_EXPLAIN'] . '</span></dt> 1982 <dd><input id="fulltext_native_max_chars" type="number" min="0" max="255" name="config[fulltext_native_max_chars]" value="' . (int) $this->config['fulltext_native_max_chars'] . '" /></dd> 1983 </dl> 1984 <dl> 1985 <dt><label for="fulltext_native_common_thres">' . $this->user->lang['COMMON_WORD_THRESHOLD'] . $this->user->lang['COLON'] . '</label><br /><span>' . $this->user->lang['COMMON_WORD_THRESHOLD_EXPLAIN'] . '</span></dt> 1986 <dd><input id="fulltext_native_common_thres" type="text" name="config[fulltext_native_common_thres]" value="' . (double) $this->config['fulltext_native_common_thres'] . '" /> %</dd> 1987 </dl> 1988 '; 1989 1990 // These are fields required in the config table 1991 return array( 1992 'tpl' => $tpl, 1993 'config' => array('fulltext_native_load_upd' => 'bool', 'fulltext_native_min_chars' => 'integer:0:255', 'fulltext_native_max_chars' => 'integer:0:255', 'fulltext_native_common_thres' => 'double:0:100') 1994 ); 1995 } 1996 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Thu Jan 11 00:25:41 2018 | Cross-referenced by PHPXref 0.7.1 |