[dev] [commit] r1004 - wwwbase
automailer at dexonline.ro
automailer at dexonline.ro
Mon Sep 23 09:53:22 EEST 2013
Author: alinu
Date: Mon Sep 23 09:53:21 2013
New Revision: 1004
Log:
diacritice.php - inainte consideram ca textul dat ca input e fara diacritice, astfel tot ce nu era ctype_alpha sau '-' era considerat separator, astfel diacriticele erau separatori. Exemplu
input: specificațiile
output: specificățiile
deoacere ț era separator, a facut match pe 'specifică'
Acum consider ctype_alpha(StringUtil::unicodeToLatin($ch)) || $ch == '-');
Modified:
wwwbase/diacritice.php
Modified: wwwbase/diacritice.php
==============================================================================
--- wwwbase/diacritice.php Mon Sep 23 09:15:44 2013 (r1003)
+++ wwwbase/diacritice.php Mon Sep 23 09:53:21 2013 (r1004)
@@ -46,7 +46,7 @@
self::$diacritics = pref_getSectionPreference("crawler", "diacritics");
self::$nonLowerDiacritics = pref_getSectionPreference("crawler", "non_lower_diacritics");
- self::$nonUpperDiacritics = pref_getSectionPreference("crawler", "non_upper_diacritics");
+ self::$nonUpperDiacritics = mb_strtoupper(self::$nonLowerDiacritics);
self::$paddingNumber = pref_getSectionPreference('crawler', 'diacritics_padding_length');
self::$paddingChar = pref_getSectionPreference('crawler', 'padding_char');
$this->selectCount = 0;
@@ -72,7 +72,7 @@
static function isSeparator($ch) {
crawlerLog("INSIDE " . __FILE__ . ' - ' . __CLASS__ . '::' . __FUNCTION__ . '() - ' . 'line '.__LINE__ );
- return !(ctype_alpha($ch) || $ch == '-');
+ return !(ctype_alpha(StringUtil::unicodeToLatin($ch)) || $ch == '-');
}
@@ -180,10 +180,10 @@
if ($tableObj != null) {
crawlerLog("Entry Exists");
$ch = $this->getAllCharForms($tableObj, $middle);
+ $textSubstr = mb_substr($this->text, $this->lastOffset, $offset - $this->lastOffset);
+ $this->resultText .= $textSubstr;
- $this->resultText .= mb_substr($this->text, $this->lastOffset, $offset - $this->lastOffset);
-
- $this->hiddenText .= mb_substr($this->text, $this->lastOffset, $offset - $this->lastOffset);
+ $this->hiddenText .= $textSubstr;
$this->resultText .= $ch;
@@ -197,10 +197,10 @@
}
else {
+ $textSubstr = mb_substr($this->text, $this->lastOffset, $offset - $this->lastOffset + 1);
+ $this->resultText .= $textSubstr;
- $this->resultText .= mb_substr($this->text, $this->lastOffset, $offset - $this->lastOffset + 1);
-
- $this->hiddenText .= mb_substr($this->text, $this->lastOffset, $offset - $this->lastOffset + 1);
+ $this->hiddenText .= $textSubstr;
}
$this->lastOffset = $this->currOffset;
More information about the Dev
mailing list