403WebShell

403Webshell
Server IP : 80.87.202.40 / Your IP : 216.73.216.169
Web Server : Apache
System : Linux rospirotorg.ru 5.14.0-539.el9.x86_64 #1 SMP PREEMPT_DYNAMIC Thu Dec 5 22:26:13 UTC 2024 x86_64
User : bitrix ( 600)
PHP Version : 8.2.27
Disable Function : NONE
MySQL : OFF | cURL : ON | WGET : ON | Perl : ON | Python : OFF | Sudo : ON | Pkexec : ON
Directory : /home/bitrix/ext_www/rospirotorg.ru/bitrix/modules/search/tools/en/
Upload File :
[ Back ]
Current File : /home/bitrix/ext_www/rospirotorg.ru/bitrix/modules/search/tools/en/stemming.php
<?php
global $STEMMING_EN_STEP2A;
$STEMMING_EN_STEP2A = [
	'TIONAL' => 'TION', 'ENCI' => 'ENCE', 'ANCI' => 'ANCE', 'ABLI' => 'ABLE', 'ENTLI' => 'ENT',
	'IZER' => 'IZE', 'IZATION' => 'IZE', 'ATIONAL' => 'ATE', 'ATION' => 'ATE', 'ATOR' => 'ATE',
	'ALISM' => 'AL', 'ALITI' => 'AL', 'ALLI' => 'AL', 'FULNESS' => 'FUL', 'OUSLI' => 'OUS',
	'OUSNESS' => 'OUS', 'IVENESS' => 'IVE', 'IVITI' => 'IVE', 'BILITI' => 'BLE', 'BLI' => 'BLE',
	'FULLI' => 'FUL', 'LESSLI' => 'LESS'
];
global $STEMMING_EN_STEP2;
$STEMMING_EN_STEP2 = '/(' . implode('|', array_keys($STEMMING_EN_STEP2A)) . '|OGI|LI)$/';
global $STEMMING_EN_STEP3A;
$STEMMING_EN_STEP3A = [
	'TIONAL' => 'TION', 'ATIONAL' => 'ATE', 'ALIZE' => 'AL', 'ICATE' => 'IC', 'ICITI' => 'IC',
	'ICAL' => 'IC', 'FUL' => '', 'NESS' => ''
];
global $STEMMING_EN_STEP3;
$STEMMING_EN_STEP3 = '/(' . implode('|', array_keys($STEMMING_EN_STEP3A)) . '|ATIVE)$/';
global $STEMMING_EN_STEP4A;
$STEMMING_EN_STEP4A = [
	'AL', 'ANCE', 'ENCE', 'ER', 'IC',
	'ABLE', 'IBLE', 'ANT', 'EMENT', 'MENT',
	'ENT', 'ISM', 'ATE', 'ITI', 'OUS',
	'IVE', 'IZE'
];
global $STEMMING_EN_STEP4;
$STEMMING_EN_STEP4 = '/(' . implode('|', $STEMMING_EN_STEP4A) . '|ION)$/';
global $STEMMING_EN_EX1;
$STEMMING_EN_EX1 = [
	'SKIS' => 'SKI',
	'SKIES' => 'SKY',
	'DYING' => 'DIE',
	'LYING' => 'LIE',
	'TYING' => 'TIE',
	'IDLY' => 'IDL',
	'GENTLY' => 'GENTL',
	'UGLY' => 'UGLI',
	'EARLY' => 'EARLI',
	'ONLY' => 'ONLI',
	'SINGLY' => 'SINGL',
	'SKY' => 'SKY',
	'NEWS' => 'NEWS',
	'HOWE' => 'HOWE',
	'ATLAS' => 'ATLAS',
	'COSMOS' => 'COSMOS',
	'BIAS' => 'BIAS',
	'ANDES' => 'ANDES',
];
global $STEMMING_EN_EX2;
$STEMMING_EN_EX2 = [
	'INNING' => 1,
	'OUTING' => 1,
	'CANNING' => 1,
	'HERRING' => 1,
	'EARRING' => 1,
	'PROCEED' => 1,
	'EXCEED' => 1,
	'SUCCEED' => 1,
];

function stemming_letter_en()
{
	return 'qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM';
}

function stemming_stop_en($sWord)
{
	if (mb_strlen($sWord) < 2)
	{
		return false;
	}
	static $stop_list = false;
	if (!$stop_list)
	{
		$stop_list = [
			'QUOTE' => 0, 'HTTP' => 0, 'WWW' => 0, 'RU' => 0, 'IMG' => 0, 'GIF' => 0, 'A' => 0, 'THE' => 0, 'IS' => 0,
			'ARE' => 0, 'OFF' => 0, 'ON' => 0, 'AND' => 0, 'IN' => 0, 'FOR' => 0, 'OF' => 0, 'BY' => 0, 'WITH' => 0,
			'BE' => 0, 'WAS' => 0, 'IT' => 0,
		];
		if (defined('STEMMING_STOP_EN'))
		{
			foreach (explode(',', STEMMING_STOP_EN) as $word)
			{
				$word = trim($word);
				if ($word <> '')
				{
					$stop_list[$word] = 0;
				}
			}
		}
	}
	return !array_key_exists($sWord, $stop_list);
}

function stemming_upper_en($sText)
{
	return mb_strtoupper($sText);
}

function stemming_en($word)
{
	global $STEMMING_EN_STEP2A;
	global $STEMMING_EN_STEP2;
	global $STEMMING_EN_STEP3A;
	global $STEMMING_EN_STEP3;
	global $STEMMING_EN_STEP4A;
	global $STEMMING_EN_STEP4;
	global $STEMMING_EN_EX1;
	global $STEMMING_EN_EX2;

	//If the word has two letters or less, leave it as it is.
	$word_len = mb_strlen($word);
	if ($word_len <= 2)
	{
		return $word;
	}
	if (array_key_exists($word, $STEMMING_EN_EX1))
	{
		return $STEMMING_EN_EX1[$word];
	}

	//Set initial y, or y after a vowel, to Y, and then establish the regions R1 and R2. (See  note on vowel marking.)
	$vowels = 'AEIOUY';
	$word = preg_replace('/^Y/', 'y', $word);
	$word = preg_replace('/([' . $vowels . '])(Y)/', "\\1y", $word);

	//In any word, R1 is the region after the first non-vowel following a vowel, or the end of the word if it contains no such a non-vowel.
	$R1 = 0;
	while (($R1 < $word_len) && (mb_strpos($vowels, mb_substr($word, $R1, 1)) === false))
	{
		$R1++;
	}
	while (($R1 < $word_len) && (mb_strpos($vowels, mb_substr($word, $R1, 1)) !== false))
	{
		$R1++;
	}
	if ($R1 < $word_len)
	{
		$R1++;
	}
	if (preg_match('/^COMMUN/', $word))
	{
		$R1 = 6;
	}
	if (preg_match('/^GENER/', $word))
	{
		$R1 = 5;
	}

	$R2 = $R1;
	while (($R2 < $word_len) && (mb_strpos($vowels, mb_substr($word, $R2, 1)) === false))
	{
		$R2++;
	}
	while (($R2 < $word_len) && (mb_strpos($vowels, mb_substr($word, $R2, 1)) !== false))
	{
		$R2++;
	}
	if ($R2 < $word_len)
	{
		$R2++;
	}

	//Step 1a:
	//	Search for the longest among the following suffixes, and perform the action indicated.
	$found = [];
	if (preg_match('/(SSES|IED|IES|US|SS|S)$/', $word, $found))
	{
		switch ($found[0])
		{
			//sses - replace by ss
		case 'SSES':
			$word = mb_substr($word, 0, $word_len - 4) . 'SS';
			break;
			//ied+   ies* - replace by i if preceded by more than one letter, otherwise by ie  (so ties -> tie, cries -> cri)
		case 'IED':
		case 'IES':
			if (mb_strlen($word) > 4)
			{
				$word = mb_substr($word, 0, $word_len - 3) . 'I';
			}
			else
			{
				$word = mb_substr($word, 0, $word_len - 3) . 'IE';
			}
			break;
			//s  delete if the preceding word part contains a vowel not immediately before the s
			//   (so gas and this retain the s, gaps and kiwis lose it)
		case 'S':
			if (preg_match('/([' . $vowels . '].*.)(S)$/', $word))
			{
				$word = mb_substr($word, 0, $word_len - 1);
			}
			break;
			//us+   ss - do nothing
		}
	}

	if (array_key_exists($word, $STEMMING_EN_EX2))
	{
		return $word;
	}

	//Step 1b:
	//	Search for the longest among the following suffixes, and perform the action indicated.
	//eed   eedly+ - replace by ee if in R1
	if (preg_match('/(EEDLY|INGLY|EDLY|EED|ING|ED)$/', $word, $found))
	{
		switch ($found[0])
		{
		case 'EEDLY':
		case 'EED':
			if (preg_match('/' . $found[0] . '$/', mb_substr($word, $R1)))
			{
				$word = mb_substr($word, 0, mb_strlen($word) - mb_strlen($found[0])) . 'EE';
			}
			break;
		default:
			//delete if the preceding word part contains a vowel, and then
			if (($step1b = preg_replace('/([' . $vowels . '].*)(ED|EDLY|ING|INGLY)$/', "\\1", $word)) != $word)
			{
				//if the word ends at, bl or iz add e (so luxuriat -> luxuriate), or
				if (($step1b1 = preg_replace('/(AT|BL|IZ)$/', "\\1E", $step1b)) == $step1b)
				{
					//if the word ends with a double remove the last letter (so hopp -> hop), or
					if (preg_match('/(BB|DD|FF|GG|MM|NN|PP|RR|TT)$/', $step1b))
					{
						$step1b1 = mb_substr($step1b, 0, mb_strlen($step1b) - 1);
					}
					else
					{
						//if the word is short, add e (so hop -> hope)
						//A word is called short if it consists of a short syllable preceded by zero or more consonants.
						//Define a short syllable in a word as either (a) a vowel followed by a non-vowel other than w, x or Y
						//and preceded by a non-vowel, or * (b) a vowel at the beginning of the word followed by a non-vowel.
						if (preg_match('/^[^' . $vowels . ']+[' . $vowels . '][^WXy' . $vowels . ']$/', $step1b)
							|| preg_match('/^[' . $vowels . '][^' . $vowels . ']$/', $step1b)
						)
						{
							$step1b1 = $step1b . 'E';
						}
					}
				}
				$step1b = $step1b1;
			}
			$word = $step1b;
		}
	}

	//Step 1c: *
	//	replace suffix y or Y by i if preceded by a non-vowel which is not the first letter of the word (so cry -> cri, by -> by, say -> say)
	$word = preg_replace('/^(.+[^' . $vowels . '])([yY])$/', "\\1I", $word);

	//Step 2:
	//	Search for the longest among the following suffixes, and, if found and in R1, perform the action indicated.
	if (
		preg_match($STEMMING_EN_STEP2, $word, $found)
		&& preg_match('/' . $found[0] . '$/', mb_substr($word, $R1))
	)
	{
		switch ($found[0])
		{
		case 'OGI':
			if (preg_match('/LOGI$/', $word))
			{
				$word = mb_substr($word, 0, mb_strlen($word) - 3) . 'OG';
			}
			break;
		case 'LI':
			if (preg_match('/[CDEGHKMNRT]LI$/', $word))
			{
				$word = mb_substr($word, 0, mb_strlen($word) - 2);
			}
			break;
		default:
			$word = mb_substr($word, 0, mb_strlen($word) - mb_strlen($found[0])) . $STEMMING_EN_STEP2A[$found[0]];
		}
	}

	//Step 3:
	//	Search for the longest among the following suffixes, and, if found and in R1, perform the action indicated.
	if (
		preg_match($STEMMING_EN_STEP3, $word, $found)
		&& preg_match('/' . $found[0] . '$/', mb_substr($word, $R1))
	)
	{
		switch ($found[0])
		{
		case 'ATIVE':
			if (preg_match('/ATIVE$/', mb_substr($word, $R2)))
			{
				$word = mb_substr($word, 0, mb_strlen($word) - 5);
			}
			break;
		default:
			$word = mb_substr($word, 0, mb_strlen($word) - mb_strlen($found[0])) . $STEMMING_EN_STEP3A[$found[0]];
		}
	}

	//Step 4:
	//	Search for the longest among the following suffixes, and, if found and in R2, perform the action indicated.
	if (
		preg_match($STEMMING_EN_STEP4, $word, $found)
		&& preg_match('/' . $found[0] . '$/', mb_substr($word, $R2))
	)
	{
		switch ($found[0])
		{
		case 'ION':
			if (preg_match('/[ST]ION$/', $word))
			{
				$word = mb_substr($word, 0, mb_strlen($word) - mb_strlen($found[0]));
			}
			break;
		default:
			$word = mb_substr($word, 0, mb_strlen($word) - mb_strlen($found[0]));
		}
	}

	//Step 5:
	if (
		preg_match('/E$/', mb_substr($word, $R2))
		|| (
			preg_match('/E$/', mb_substr($word, $R1))
			//Define a short syllable in a word as either (a) a vowel followed by a non-vowel other than w, x or Y
			//and preceded by a non-vowel, or * (b) a vowel at the beginning of the word followed by a non-vowel.
			&& !(
				preg_match('/[^' . $vowels . '][' . $vowels . '][^WXy' . $vowels . '].$/', $word)
				|| preg_match('/^[' . $vowels . '][^' . $vowels . '].$/', $word)
			)
		)
	)
	{
		$word = mb_substr($word, 0, mb_strlen($word) - 1);
	}
	elseif (preg_match('/L$/', mb_substr($word, $R2)) && preg_match('/LL$/', $word))
	{
		$word = mb_substr($word, 0, mb_strlen($word) - 1);
	}

	return str_replace('y', 'Y', $word);
}
Youez - 2016 - github.com/yon3zu
LinuXploit