403Webshell
Server IP : 80.87.202.40  /  Your IP : 216.73.216.169
Web Server : Apache
System : Linux rospirotorg.ru 5.14.0-539.el9.x86_64 #1 SMP PREEMPT_DYNAMIC Thu Dec 5 22:26:13 UTC 2024 x86_64
User : bitrix ( 600)
PHP Version : 8.2.27
Disable Function : NONE
MySQL : OFF |  cURL : ON |  WGET : ON |  Perl : ON |  Python : OFF |  Sudo : ON |  Pkexec : ON
Directory :  /home/bitrix/ext_www/rospirotorg.ru/bitrix/modules/search/tools/

Upload File :
current_dir [ Writeable] document_root [ Writeable]

 

Command :


[ Back ]     

Current File : /home/bitrix/ext_www/rospirotorg.ru/bitrix/modules/search/tools/stemming.php
<?php

function stemming_init($sLang='ru')
{
	static $arStemFunc = false;

	//Init all languages
	if ($arStemFunc === false)
	{
		$arStemFunc = [];
		$rsLanguages = CLanguage::GetList();
		while ($arLanguage = $rsLanguages->Fetch())
		{
			stemming_init($arLanguage['LID']);
		}
	}

	//Check if language was not used
	if ($sLang !== false && !isset($arStemFunc[$sLang]))
	{
		$stemming_function_suf = $sLang;

		if (!function_exists('stemming_' . $sLang))
		{
			$strFileName = $_SERVER['DOCUMENT_ROOT'] . BX_PERSONAL_ROOT . '/php_interface/' . $sLang . '/search/stemming.php';
			if (file_exists($strFileName))
			{
				@include $strFileName;
			}
			if (!function_exists('stemming_' . $sLang))
			{
				$strFileName = $_SERVER['DOCUMENT_ROOT'] . '/bitrix/modules/search/tools/' . $sLang . '/stemming.php';
				if (file_exists($strFileName))
				{
					if (\Bitrix\Main\Localization\Translation::allowConvertEncoding())
					{
						\Bitrix\Main\Localization\StreamConverter::include($strFileName, $sLang);
					}
					else
					{
						@include $strFileName;
					}
				}
				if (!function_exists('stemming_' . $sLang))
				{
					$stemming_function_suf = 'default';
				}
			}
		}

		$stemming_stop_function = 'stemming_stop_' . $sLang;
		if (!function_exists($stemming_stop_function))
		{
			$stemming_stop_function = 'stemming_stop_default';
		}

		$stemming_upper_function = 'stemming_upper_' . $sLang;
		if (!function_exists($stemming_upper_function))
		{
			$stemming_upper_function = 'stemming_upper_default';
		}

		$letters = stemming_letter_default();
		$stemming_letter_function = 'stemming_letter_' . $sLang;
		if (function_exists($stemming_letter_function))
		{
			$letters .= $stemming_letter_function();
		}
		// Do not use CPageOption feature in the real project. This is for unit tests only.
		$letters .= CPageOption::GetOptionString('search', 'letters') ?: COption::GetOptionString('search', 'letters');

		if (function_exists($stemming_letter_function))
		{
			$abc = $stemming_letter_function();
		}
		else
		{
			$abc = '';
		}

		if ($abc == '')
		{
			$abc = stemming_letter_default();
		}

		$arStemFunc[$sLang] = [
			'stem' => 'stemming_' . $stemming_function_suf,
			'stop' => $stemming_stop_function,
			'upper' => $stemming_upper_function,
			'letters' => $letters,
			'pcre_letters' => '\\w\\d' . str_replace(
				['\\'  , '-'  , '^'  , ']'  , '/'],
				['\\\\', '\\-', '\\^', '\\]', '\\/'],
				$letters
			),
			'abc' => $abc,
			'pcre_abc' => '\\w\\d' . str_replace(
				['\\'  , '-'  , '^'  , ']'  , '/'],
				['\\\\', '\\-', '\\^', '\\]', '\\/'],
				$abc
			),
		];
	}

	if ($sLang === false)
	{
		return $arStemFunc;
	}
	else
	{
		return $arStemFunc[$sLang];
	}
}

function stemming_upper($sText, $sLang='ru')
{
	$arStemFunc = stemming_init($sLang);
	$upper_function = $arStemFunc['upper'];
	return $upper_function($sText);
}

function stemming_split($sText, $sLang='ru')
{
	$arStemFunc = stemming_init($sLang);

	$words = [];

	$tok = ' ';
	$sText = stemming_upper($sText, $sLang);
	$sText = preg_replace('/[^' . $arStemFunc['pcre_letters'] . ']/u', $tok, $sText);

	$word = strtok($sText, $tok);
	while ($word !== false)
	{
		$word = mb_substr($word, 0, 100);

		if (!isset($words[$word]))
		{
			$words[$word] = mb_strpos($sText, $word);
		}

		$word = strtok($tok);
	}

	return $words;
}

function stemming($sText, $sLang='ru', $bIgnoreStopWords = false, $bReturnPositions = false)
{
	static $STOP_CACHE = [];
	if (!isset($STOP_CACHE[$sLang]))
	{
		$STOP_CACHE[$sLang] = [];
	}
	$stop_cache = &$STOP_CACHE[$sLang];

	//Result
	$stems = [];

	//Get info about all languages
	$arStemInfo = stemming_init(false);
	//Add default functions if language was not defined
	if (!isset($arStemInfo[$sLang]))
	{
		$arStemInfo[$sLang] = stemming_init($sLang);
	}

	$stem_func = $arStemInfo[$sLang]['stem'];
	$pcre_abc = '/[^' . $arStemInfo[$sLang]['pcre_abc'] . ']+/u';

	//Delimiter of the words
	$tok = ' ';
	$sText = stemming_upper($sText, $sLang);
	if ($bReturnPositions)
	{
		$sText = preg_replace('/[^' . $arStemInfo[$sLang]['pcre_letters'] . '.!?]+/u', $tok, $sText);
		$sText = preg_replace('/[!?]+/u', '.', $sText);
	}
	else
	{
		$sText = preg_replace('/[^' . $arStemInfo[$sLang]['pcre_letters'] . ']+/u', $tok, $sText);
	}

	//Parse text
	$words = strtok($sText, $tok);
	$pos = 1;
	while ($words !== false)
	{
		if ($bReturnPositions)
		{
			$words = explode('.', $words);
		}
		else
		{
			$words = [$words];
		}

		foreach ($words as $i => $word)
		{
			$word = mb_substr($word, 0, 50);

			if ($bReturnPositions)
			{
				if ($i > 0)
				{
					$pos += 5; //Sentence distance
				}
				if ($word == '')
				{
					continue;
				}
			}

			//Try to stem starting with desired language
			//1 - stemming may return more than one word
			$stem = $stem_func($word, 1);
			$stop_lang = $sLang;

			//If word equals it's stemming
			//and has letters not from ABC
			if (
				!is_array($stem)
				&& $stem === $word
				&& preg_match($pcre_abc, $word)
			)
			{
				//Do the best to detect correct one
				$guess = stemming_detect($word, $arStemInfo, $sLang);
				if ($guess[0] <> '')
				{
					$stem = $guess[0];
					$stop_lang = $guess[1];
				}
			}

			if ($bIgnoreStopWords)
			{
				if (is_array($stem))
				{
					foreach ($stem as $st)
					{
						$stems[$st] = isset($stems[$st]) ? $stems[$st] + $pos : $pos;
					}
				}
				else
				{
					$stems[$stem] = isset($stems[$stem]) ? $stems[$stem] + $pos : $pos;
				}
			}
			else
			{
				$stop_func = $arStemInfo[$stop_lang]['stop'];
				if (is_array($stem))
				{
					foreach ($stem as $st)
					{
						if (!isset($stop_cache[$st]))
						{
							$stop_cache[$st] = $stop_func($st);
						}

						if ($stop_cache[$st])
						{
							$stems[$st] = isset($stems[$st]) ? $stems[$st] + $pos : $pos;
						}
					}
				}
				else
				{
					if (!isset($stop_cache[$stem]))
					{
						$stop_cache[$stem] = $stop_func($stem);
					}

					if ($stop_cache[$stem])
					{
						$stems[$stem] = isset($stems[$stem]) ? $stems[$stem] + $pos : $pos;
					}
				}
			}

			if ($bReturnPositions)
			{
				$pos++;
			}
		}
		//Next word
		$words = strtok($tok);
	}

	return $stems;
}

function stemming_detect($word, $arStemInfo, $skipLang)
{
	$stem = '';
	$lang = '';

	foreach ($arStemInfo as $sGuessLang => $arInfo)
	{
		if ($sGuessLang === $skipLang)
		{
			continue;
		}

		//Word has letters not from ABC, so skip to next language
		if (preg_match('/[^' . $arInfo['pcre_abc'] . ']+/u', $word))
		{
			continue;
		}

		$stem = $arInfo['stem']($word);
		$lang = $sGuessLang;

		//It looks like stemming succseeded
		if ($stem !== $word)
		{
			break;
		}

		//Check if stop function flag word as stop
		$stop_func = $arInfo['stop'];
		if (!$stop_func($stem))
		{
			break;
		}
	}

	//It' s the best we can do
	//return word and lang to use as stop
	return [$stem, $lang];
}

function stemming_upper_default($sText)
{
	return mb_strtoupper($sText);
}

function stemming_default($sText)
{
	return $sText;
}

function stemming_stop_default($sWord)
{
	if (mb_strlen($sWord) < 2)
	{
		return false;
	}
	else
	{
		return true;
	}
}

function stemming_letter_default()
{
	return 'qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM0123456789';
}

Youez - 2016 - github.com/yon3zu
LinuXploit