Server IP : 80.87.202.40 / Your IP : 216.73.216.169 Web Server : Apache System : Linux rospirotorg.ru 5.14.0-539.el9.x86_64 #1 SMP PREEMPT_DYNAMIC Thu Dec 5 22:26:13 UTC 2024 x86_64 User : bitrix ( 600) PHP Version : 8.2.27 Disable Function : NONE MySQL : OFF | cURL : ON | WGET : ON | Perl : ON | Python : OFF | Sudo : ON | Pkexec : ON Directory : /home/bitrix/ext_www/rospirotorg.ru/bitrix/modules/search/tools/en/ |
Upload File : |
<?php global $STEMMING_EN_STEP2A; $STEMMING_EN_STEP2A = [ 'TIONAL' => 'TION', 'ENCI' => 'ENCE', 'ANCI' => 'ANCE', 'ABLI' => 'ABLE', 'ENTLI' => 'ENT', 'IZER' => 'IZE', 'IZATION' => 'IZE', 'ATIONAL' => 'ATE', 'ATION' => 'ATE', 'ATOR' => 'ATE', 'ALISM' => 'AL', 'ALITI' => 'AL', 'ALLI' => 'AL', 'FULNESS' => 'FUL', 'OUSLI' => 'OUS', 'OUSNESS' => 'OUS', 'IVENESS' => 'IVE', 'IVITI' => 'IVE', 'BILITI' => 'BLE', 'BLI' => 'BLE', 'FULLI' => 'FUL', 'LESSLI' => 'LESS' ]; global $STEMMING_EN_STEP2; $STEMMING_EN_STEP2 = '/(' . implode('|', array_keys($STEMMING_EN_STEP2A)) . '|OGI|LI)$/'; global $STEMMING_EN_STEP3A; $STEMMING_EN_STEP3A = [ 'TIONAL' => 'TION', 'ATIONAL' => 'ATE', 'ALIZE' => 'AL', 'ICATE' => 'IC', 'ICITI' => 'IC', 'ICAL' => 'IC', 'FUL' => '', 'NESS' => '' ]; global $STEMMING_EN_STEP3; $STEMMING_EN_STEP3 = '/(' . implode('|', array_keys($STEMMING_EN_STEP3A)) . '|ATIVE)$/'; global $STEMMING_EN_STEP4A; $STEMMING_EN_STEP4A = [ 'AL', 'ANCE', 'ENCE', 'ER', 'IC', 'ABLE', 'IBLE', 'ANT', 'EMENT', 'MENT', 'ENT', 'ISM', 'ATE', 'ITI', 'OUS', 'IVE', 'IZE' ]; global $STEMMING_EN_STEP4; $STEMMING_EN_STEP4 = '/(' . implode('|', $STEMMING_EN_STEP4A) . '|ION)$/'; global $STEMMING_EN_EX1; $STEMMING_EN_EX1 = [ 'SKIS' => 'SKI', 'SKIES' => 'SKY', 'DYING' => 'DIE', 'LYING' => 'LIE', 'TYING' => 'TIE', 'IDLY' => 'IDL', 'GENTLY' => 'GENTL', 'UGLY' => 'UGLI', 'EARLY' => 'EARLI', 'ONLY' => 'ONLI', 'SINGLY' => 'SINGL', 'SKY' => 'SKY', 'NEWS' => 'NEWS', 'HOWE' => 'HOWE', 'ATLAS' => 'ATLAS', 'COSMOS' => 'COSMOS', 'BIAS' => 'BIAS', 'ANDES' => 'ANDES', ]; global $STEMMING_EN_EX2; $STEMMING_EN_EX2 = [ 'INNING' => 1, 'OUTING' => 1, 'CANNING' => 1, 'HERRING' => 1, 'EARRING' => 1, 'PROCEED' => 1, 'EXCEED' => 1, 'SUCCEED' => 1, ]; function stemming_letter_en() { return 'qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM'; } function stemming_stop_en($sWord) { if (mb_strlen($sWord) < 2) { return false; } static $stop_list = false; if (!$stop_list) { $stop_list = [ 'QUOTE' => 0, 'HTTP' => 0, 'WWW' => 0, 'RU' => 0, 'IMG' => 0, 'GIF' => 0, 'A' => 0, 'THE' => 0, 'IS' => 0, 'ARE' => 0, 'OFF' => 0, 'ON' => 0, 'AND' => 0, 'IN' => 0, 'FOR' => 0, 'OF' => 0, 'BY' => 0, 'WITH' => 0, 'BE' => 0, 'WAS' => 0, 'IT' => 0, ]; if (defined('STEMMING_STOP_EN')) { foreach (explode(',', STEMMING_STOP_EN) as $word) { $word = trim($word); if ($word <> '') { $stop_list[$word] = 0; } } } } return !array_key_exists($sWord, $stop_list); } function stemming_upper_en($sText) { return mb_strtoupper($sText); } function stemming_en($word) { global $STEMMING_EN_STEP2A; global $STEMMING_EN_STEP2; global $STEMMING_EN_STEP3A; global $STEMMING_EN_STEP3; global $STEMMING_EN_STEP4A; global $STEMMING_EN_STEP4; global $STEMMING_EN_EX1; global $STEMMING_EN_EX2; //If the word has two letters or less, leave it as it is. $word_len = mb_strlen($word); if ($word_len <= 2) { return $word; } if (array_key_exists($word, $STEMMING_EN_EX1)) { return $STEMMING_EN_EX1[$word]; } //Set initial y, or y after a vowel, to Y, and then establish the regions R1 and R2. (See note on vowel marking.) $vowels = 'AEIOUY'; $word = preg_replace('/^Y/', 'y', $word); $word = preg_replace('/([' . $vowels . '])(Y)/', "\\1y", $word); //In any word, R1 is the region after the first non-vowel following a vowel, or the end of the word if it contains no such a non-vowel. $R1 = 0; while (($R1 < $word_len) && (mb_strpos($vowels, mb_substr($word, $R1, 1)) === false)) { $R1++; } while (($R1 < $word_len) && (mb_strpos($vowels, mb_substr($word, $R1, 1)) !== false)) { $R1++; } if ($R1 < $word_len) { $R1++; } if (preg_match('/^COMMUN/', $word)) { $R1 = 6; } if (preg_match('/^GENER/', $word)) { $R1 = 5; } $R2 = $R1; while (($R2 < $word_len) && (mb_strpos($vowels, mb_substr($word, $R2, 1)) === false)) { $R2++; } while (($R2 < $word_len) && (mb_strpos($vowels, mb_substr($word, $R2, 1)) !== false)) { $R2++; } if ($R2 < $word_len) { $R2++; } //Step 1a: // Search for the longest among the following suffixes, and perform the action indicated. $found = []; if (preg_match('/(SSES|IED|IES|US|SS|S)$/', $word, $found)) { switch ($found[0]) { //sses - replace by ss case 'SSES': $word = mb_substr($word, 0, $word_len - 4) . 'SS'; break; //ied+ ies* - replace by i if preceded by more than one letter, otherwise by ie (so ties -> tie, cries -> cri) case 'IED': case 'IES': if (mb_strlen($word) > 4) { $word = mb_substr($word, 0, $word_len - 3) . 'I'; } else { $word = mb_substr($word, 0, $word_len - 3) . 'IE'; } break; //s delete if the preceding word part contains a vowel not immediately before the s // (so gas and this retain the s, gaps and kiwis lose it) case 'S': if (preg_match('/([' . $vowels . '].*.)(S)$/', $word)) { $word = mb_substr($word, 0, $word_len - 1); } break; //us+ ss - do nothing } } if (array_key_exists($word, $STEMMING_EN_EX2)) { return $word; } //Step 1b: // Search for the longest among the following suffixes, and perform the action indicated. //eed eedly+ - replace by ee if in R1 if (preg_match('/(EEDLY|INGLY|EDLY|EED|ING|ED)$/', $word, $found)) { switch ($found[0]) { case 'EEDLY': case 'EED': if (preg_match('/' . $found[0] . '$/', mb_substr($word, $R1))) { $word = mb_substr($word, 0, mb_strlen($word) - mb_strlen($found[0])) . 'EE'; } break; default: //delete if the preceding word part contains a vowel, and then if (($step1b = preg_replace('/([' . $vowels . '].*)(ED|EDLY|ING|INGLY)$/', "\\1", $word)) != $word) { //if the word ends at, bl or iz add e (so luxuriat -> luxuriate), or if (($step1b1 = preg_replace('/(AT|BL|IZ)$/', "\\1E", $step1b)) == $step1b) { //if the word ends with a double remove the last letter (so hopp -> hop), or if (preg_match('/(BB|DD|FF|GG|MM|NN|PP|RR|TT)$/', $step1b)) { $step1b1 = mb_substr($step1b, 0, mb_strlen($step1b) - 1); } else { //if the word is short, add e (so hop -> hope) //A word is called short if it consists of a short syllable preceded by zero or more consonants. //Define a short syllable in a word as either (a) a vowel followed by a non-vowel other than w, x or Y //and preceded by a non-vowel, or * (b) a vowel at the beginning of the word followed by a non-vowel. if (preg_match('/^[^' . $vowels . ']+[' . $vowels . '][^WXy' . $vowels . ']$/', $step1b) || preg_match('/^[' . $vowels . '][^' . $vowels . ']$/', $step1b) ) { $step1b1 = $step1b . 'E'; } } } $step1b = $step1b1; } $word = $step1b; } } //Step 1c: * // replace suffix y or Y by i if preceded by a non-vowel which is not the first letter of the word (so cry -> cri, by -> by, say -> say) $word = preg_replace('/^(.+[^' . $vowels . '])([yY])$/', "\\1I", $word); //Step 2: // Search for the longest among the following suffixes, and, if found and in R1, perform the action indicated. if ( preg_match($STEMMING_EN_STEP2, $word, $found) && preg_match('/' . $found[0] . '$/', mb_substr($word, $R1)) ) { switch ($found[0]) { case 'OGI': if (preg_match('/LOGI$/', $word)) { $word = mb_substr($word, 0, mb_strlen($word) - 3) . 'OG'; } break; case 'LI': if (preg_match('/[CDEGHKMNRT]LI$/', $word)) { $word = mb_substr($word, 0, mb_strlen($word) - 2); } break; default: $word = mb_substr($word, 0, mb_strlen($word) - mb_strlen($found[0])) . $STEMMING_EN_STEP2A[$found[0]]; } } //Step 3: // Search for the longest among the following suffixes, and, if found and in R1, perform the action indicated. if ( preg_match($STEMMING_EN_STEP3, $word, $found) && preg_match('/' . $found[0] . '$/', mb_substr($word, $R1)) ) { switch ($found[0]) { case 'ATIVE': if (preg_match('/ATIVE$/', mb_substr($word, $R2))) { $word = mb_substr($word, 0, mb_strlen($word) - 5); } break; default: $word = mb_substr($word, 0, mb_strlen($word) - mb_strlen($found[0])) . $STEMMING_EN_STEP3A[$found[0]]; } } //Step 4: // Search for the longest among the following suffixes, and, if found and in R2, perform the action indicated. if ( preg_match($STEMMING_EN_STEP4, $word, $found) && preg_match('/' . $found[0] . '$/', mb_substr($word, $R2)) ) { switch ($found[0]) { case 'ION': if (preg_match('/[ST]ION$/', $word)) { $word = mb_substr($word, 0, mb_strlen($word) - mb_strlen($found[0])); } break; default: $word = mb_substr($word, 0, mb_strlen($word) - mb_strlen($found[0])); } } //Step 5: if ( preg_match('/E$/', mb_substr($word, $R2)) || ( preg_match('/E$/', mb_substr($word, $R1)) //Define a short syllable in a word as either (a) a vowel followed by a non-vowel other than w, x or Y //and preceded by a non-vowel, or * (b) a vowel at the beginning of the word followed by a non-vowel. && !( preg_match('/[^' . $vowels . '][' . $vowels . '][^WXy' . $vowels . '].$/', $word) || preg_match('/^[' . $vowels . '][^' . $vowels . '].$/', $word) ) ) ) { $word = mb_substr($word, 0, mb_strlen($word) - 1); } elseif (preg_match('/L$/', mb_substr($word, $R2)) && preg_match('/LL$/', $word)) { $word = mb_substr($word, 0, mb_strlen($word) - 1); } return str_replace('y', 'Y', $word); }