[dev] [commit] r982 - wwwbase/Crawler

automailer at dexonline.ro automailer at dexonline.ro
Wed Sep 11 23:28:44 EEST 2013


Author: alinu
Date: Wed Sep 11 23:28:44 2013
New Revision: 982

Log:


Modified:
   wwwbase/Crawler/DiacriticsBuilder.php

Modified: wwwbase/Crawler/DiacriticsBuilder.php
==============================================================================
--- wwwbase/Crawler/DiacriticsBuilder.php	Wed Sep 11 21:03:41 2013	(r981)
+++ wwwbase/Crawler/DiacriticsBuilder.php	Wed Sep 11 23:28:44 2013	(r982)
@@ -73,11 +73,24 @@
 		while($this->currOffset <= $this->fileEndOffset) {
 			//daca urmatorul offset e a,i,s,t sau ă,â,î,ș,ț
 			$ch = substr($this->file, $this->currOffset, 1);
-			if (strstr(self::$nonDiacritics, $ch) ||
-				strstr(self::$diacritics, $ch)) {
+			if (strstr(self::$nonDiacritics, $ch)) {
 				
-				return $this->currOffset++;
+				return $this->currOffset ++;
 			}
+			else {
+
+				$ch = substr($this->file, $this->currOffset, 2);
+
+				if (strstr(self::$diacritics, $ch)) {
+				
+					$this->currOffset += 2;
+
+					return $this->currOffset - 2;
+				}
+			}
+
+
+
 			//trecem la urmatorul caracter
 			$this->currOffset ++;
 		}
@@ -99,19 +112,93 @@
 			$middle = substr($this->file, $offset, 2);
 		else
 			$middle = substr($this->file, $offset, 1);
-		$after = '';
 
-		$inferiorSeparator = false;
-		$superiorSeparator = false;
+		$after = 'test';
+
+		$infPadding = false;
+		$supPadding = false;
 
 
 		//echo "OFFSET ".$offset. '  char '.substr($this->file, $offset, 1).PHP_EOL; 
 
 
+		$infOffset = $offset - 2;
+
+		$supOffset = $offset + strlen($middle);
+
+		$firstLetter = false;
+
 		for ($i = 0; $i < self::$paddingNumber; $i++) {
 
-			$inferiorOffset = $offset - 1 - $i;
-			$superiorOffset = $offset + 1 + $i;
+			if ($infOffset < 0) {
+
+				if ($infOffset + 1 == 0) {
+
+					$firstLetter = true;
+				}
+				else {
+
+					$infPadding = true;
+				}
+			}
+
+
+
+			if ($infPadding) {
+
+				$before = '*' . $before;
+			}
+			else {
+
+				if ($firstLetter) {
+
+					$infCh = substr($this->file, $infOffset, 1);
+
+					if ($this->isSeparator($infCh)) {
+
+						$infPadding = true;
+						$before = '*' . $before;
+					}
+					else {
+
+						$before = $infCh . $before;
+					}
+
+				}
+
+				else {
+
+					$infCh = substr($this->file, $infOffset, 2);
+
+					if (!strstr(self::$diacritics, $infCh)) {
+						$infOffset ++;
+						$infCh = substr($this->file, $infOffset, 1);
+					}
+
+					if ($this->isSeparator($infCh)) {
+
+						$infPadding = true;
+						$before = '*' . $before;
+					}
+					else {
+						$before = $infCh . $before;
+
+						$infOffset -= 2;
+					}
+				}
+			}
+		}
+/*
+			$supCh = substr($this->file, $superiorOffset, 2);
+
+			if (!strstr(self::$diacritics, $supCh)) {
+
+				$supCh = substr($this->file, $superiorOffset, 1);
+			}
+
+			
+
+
 
 			if ($inferiorOffset < 0) {
 				
@@ -149,7 +236,7 @@
 		}
 
 		//echo "RESULT   $before|$middle|$after".PHP_EOL;
-
+*/
 
 		Diacritics::save2Db($before, $middle, $after);
 


More information about the Dev mailing list