[dev] [commit] r1004 - wwwbase

Cătălin Frâncu cata at francu.com
Tue Sep 24 19:54:57 EEST 2013


De acord!

Cătălin

On 09/22/2013 11:53 PM, automailer at dexonline.ro wrote:
> Author: alinu
> Date: Mon Sep 23 09:53:21 2013
> New Revision: 1004
>
> Log:
> diacritice.php - inainte consideram ca textul dat ca input e fara diacritice, astfel tot ce nu era ctype_alpha sau '-' era considerat separator, astfel diacriticele erau separatori. Exemplu
> input: specificațiile
> output: specificățiile
> deoacere ț era separator, a facut match pe 'specifică'
>
> Acum consider ctype_alpha(StringUtil::unicodeToLatin($ch)) || $ch == '-');
>
> Modified:
>     wwwbase/diacritice.php
>
> Modified: wwwbase/diacritice.php
> ==============================================================================
> --- wwwbase/diacritice.php	Mon Sep 23 09:15:44 2013	(r1003)
> +++ wwwbase/diacritice.php	Mon Sep 23 09:53:21 2013	(r1004)
> @@ -46,7 +46,7 @@
>
>   		self::$diacritics = pref_getSectionPreference("crawler", "diacritics");
>   		self::$nonLowerDiacritics = pref_getSectionPreference("crawler", "non_lower_diacritics");
> -		self::$nonUpperDiacritics = pref_getSectionPreference("crawler", "non_upper_diacritics");
> +		self::$nonUpperDiacritics = mb_strtoupper(self::$nonLowerDiacritics);
>   		self::$paddingNumber = pref_getSectionPreference('crawler', 'diacritics_padding_length');
>   		self::$paddingChar = pref_getSectionPreference('crawler', 'padding_char');
>   		$this->selectCount = 0;
> @@ -72,7 +72,7 @@
>
>   	static function isSeparator($ch) {
>   		crawlerLog("INSIDE " . __FILE__ . ' - ' . __CLASS__ . '::' . __FUNCTION__ . '() - ' . 'line '.__LINE__ );
> -		return !(ctype_alpha($ch) || $ch == '-');
> +		return !(ctype_alpha(StringUtil::unicodeToLatin($ch)) || $ch == '-');
>   	}
>
>
> @@ -180,10 +180,10 @@
>   		if ($tableObj != null) {
>   			crawlerLog("Entry Exists");
>   			$ch = $this->getAllCharForms($tableObj, $middle);
> +			$textSubstr = mb_substr($this->text, $this->lastOffset, $offset - $this->lastOffset);
> +			$this->resultText .= $textSubstr;
>
> -			$this->resultText .= mb_substr($this->text, $this->lastOffset, $offset - $this->lastOffset);
> -
> -			$this->hiddenText .= mb_substr($this->text, $this->lastOffset, $offset - $this->lastOffset);
> +			$this->hiddenText .= $textSubstr;
>
>   			$this->resultText .= $ch;
>
> @@ -197,10 +197,10 @@
>
>   		}
>   		else {
> +			$textSubstr = mb_substr($this->text, $this->lastOffset, $offset - $this->lastOffset + 1);
> +			$this->resultText .= $textSubstr;
>
> -			$this->resultText .= mb_substr($this->text, $this->lastOffset, $offset - $this->lastOffset + 1);
> -
> -			$this->hiddenText .= mb_substr($this->text, $this->lastOffset, $offset - $this->lastOffset + 1);			
> +			$this->hiddenText .= $textSubstr;
>   		}
>
>   		$this->lastOffset = $this->currOffset;
> _______________________________________________
> Dev mailing list
> Dev at dexonline.ro
> http://list.dexonline.ro/listinfo/dev
>


More information about the Dev mailing list