[dev] [commit] r1004 - wwwbase

automailer at dexonline.ro automailer at dexonline.ro
Mon Sep 23 09:53:22 EEST 2013


Author: alinu
Date: Mon Sep 23 09:53:21 2013
New Revision: 1004

Log:
diacritice.php - inainte consideram ca textul dat ca input e fara diacritice, astfel tot ce nu era ctype_alpha sau '-' era considerat separator, astfel diacriticele erau separatori. Exemplu
input: specificațiile
output: specificățiile
deoacere ț era separator, a facut match pe 'specifică'

Acum consider ctype_alpha(StringUtil::unicodeToLatin($ch)) || $ch == '-');

Modified:
   wwwbase/diacritice.php

Modified: wwwbase/diacritice.php
==============================================================================
--- wwwbase/diacritice.php	Mon Sep 23 09:15:44 2013	(r1003)
+++ wwwbase/diacritice.php	Mon Sep 23 09:53:21 2013	(r1004)
@@ -46,7 +46,7 @@
 
 		self::$diacritics = pref_getSectionPreference("crawler", "diacritics");
 		self::$nonLowerDiacritics = pref_getSectionPreference("crawler", "non_lower_diacritics");
-		self::$nonUpperDiacritics = pref_getSectionPreference("crawler", "non_upper_diacritics");
+		self::$nonUpperDiacritics = mb_strtoupper(self::$nonLowerDiacritics);
 		self::$paddingNumber = pref_getSectionPreference('crawler', 'diacritics_padding_length');
 		self::$paddingChar = pref_getSectionPreference('crawler', 'padding_char');
 		$this->selectCount = 0;
@@ -72,7 +72,7 @@
 
 	static function isSeparator($ch) {
 		crawlerLog("INSIDE " . __FILE__ . ' - ' . __CLASS__ . '::' . __FUNCTION__ . '() - ' . 'line '.__LINE__ );
-		return !(ctype_alpha($ch) || $ch == '-');
+		return !(ctype_alpha(StringUtil::unicodeToLatin($ch)) || $ch == '-');
 	}
 
 
@@ -180,10 +180,10 @@
 		if ($tableObj != null) {
 			crawlerLog("Entry Exists");
 			$ch = $this->getAllCharForms($tableObj, $middle);
+			$textSubstr = mb_substr($this->text, $this->lastOffset, $offset - $this->lastOffset);
+			$this->resultText .= $textSubstr;
 
-			$this->resultText .= mb_substr($this->text, $this->lastOffset, $offset - $this->lastOffset);
-
-			$this->hiddenText .= mb_substr($this->text, $this->lastOffset, $offset - $this->lastOffset);
+			$this->hiddenText .= $textSubstr;
 
 			$this->resultText .= $ch;
 
@@ -197,10 +197,10 @@
 
 		}
 		else {
+			$textSubstr = mb_substr($this->text, $this->lastOffset, $offset - $this->lastOffset + 1);
+			$this->resultText .= $textSubstr;
 
-			$this->resultText .= mb_substr($this->text, $this->lastOffset, $offset - $this->lastOffset + 1);
-
-			$this->hiddenText .= mb_substr($this->text, $this->lastOffset, $offset - $this->lastOffset + 1);			
+			$this->hiddenText .= $textSubstr;
 		}
 
 		$this->lastOffset = $this->currOffset;


More information about the Dev mailing list