[dev] [commit] r1004 - wwwbase
Cătălin Frâncu
cata at francu.com
Tue Sep 24 19:54:57 EEST 2013
De acord!
Cătălin
On 09/22/2013 11:53 PM, automailer at dexonline.ro wrote:
> Author: alinu
> Date: Mon Sep 23 09:53:21 2013
> New Revision: 1004
>
> Log:
> diacritice.php - inainte consideram ca textul dat ca input e fara diacritice, astfel tot ce nu era ctype_alpha sau '-' era considerat separator, astfel diacriticele erau separatori. Exemplu
> input: specificațiile
> output: specificățiile
> deoacere ț era separator, a facut match pe 'specifică'
>
> Acum consider ctype_alpha(StringUtil::unicodeToLatin($ch)) || $ch == '-');
>
> Modified:
> wwwbase/diacritice.php
>
> Modified: wwwbase/diacritice.php
> ==============================================================================
> --- wwwbase/diacritice.php Mon Sep 23 09:15:44 2013 (r1003)
> +++ wwwbase/diacritice.php Mon Sep 23 09:53:21 2013 (r1004)
> @@ -46,7 +46,7 @@
>
> self::$diacritics = pref_getSectionPreference("crawler", "diacritics");
> self::$nonLowerDiacritics = pref_getSectionPreference("crawler", "non_lower_diacritics");
> - self::$nonUpperDiacritics = pref_getSectionPreference("crawler", "non_upper_diacritics");
> + self::$nonUpperDiacritics = mb_strtoupper(self::$nonLowerDiacritics);
> self::$paddingNumber = pref_getSectionPreference('crawler', 'diacritics_padding_length');
> self::$paddingChar = pref_getSectionPreference('crawler', 'padding_char');
> $this->selectCount = 0;
> @@ -72,7 +72,7 @@
>
> static function isSeparator($ch) {
> crawlerLog("INSIDE " . __FILE__ . ' - ' . __CLASS__ . '::' . __FUNCTION__ . '() - ' . 'line '.__LINE__ );
> - return !(ctype_alpha($ch) || $ch == '-');
> + return !(ctype_alpha(StringUtil::unicodeToLatin($ch)) || $ch == '-');
> }
>
>
> @@ -180,10 +180,10 @@
> if ($tableObj != null) {
> crawlerLog("Entry Exists");
> $ch = $this->getAllCharForms($tableObj, $middle);
> + $textSubstr = mb_substr($this->text, $this->lastOffset, $offset - $this->lastOffset);
> + $this->resultText .= $textSubstr;
>
> - $this->resultText .= mb_substr($this->text, $this->lastOffset, $offset - $this->lastOffset);
> -
> - $this->hiddenText .= mb_substr($this->text, $this->lastOffset, $offset - $this->lastOffset);
> + $this->hiddenText .= $textSubstr;
>
> $this->resultText .= $ch;
>
> @@ -197,10 +197,10 @@
>
> }
> else {
> + $textSubstr = mb_substr($this->text, $this->lastOffset, $offset - $this->lastOffset + 1);
> + $this->resultText .= $textSubstr;
>
> - $this->resultText .= mb_substr($this->text, $this->lastOffset, $offset - $this->lastOffset + 1);
> -
> - $this->hiddenText .= mb_substr($this->text, $this->lastOffset, $offset - $this->lastOffset + 1);
> + $this->hiddenText .= $textSubstr;
> }
>
> $this->lastOffset = $this->currOffset;
> _______________________________________________
> Dev mailing list
> Dev at dexonline.ro
> http://list.dexonline.ro/listinfo/dev
>
More information about the Dev
mailing list