[dev] [commit] r984 - phplib/models wwwbase/Crawler

automailer at dexonline.ro automailer at dexonline.ro
Fri Sep 13 19:06:02 EEST 2013


Author: alinu
Date: Fri Sep 13 19:06:02 2013
New Revision: 984

Log:
Aceasta versiune are models/FilesUsedInDiacritics.php si tabelele pentru diacritice.
Nu este importanta revizuirea ei deoarece am uploadat-o pentru a nu o pierde.

Added:
   phplib/models/FilesUsedInDiacritics.php
   wwwbase/Crawler/DiacriticsTables.sql
Modified:
   phplib/models/Diacritics.php
   wwwbase/Crawler/DiacriticsBuilder.php

Modified: phplib/models/Diacritics.php
==============================================================================
--- phplib/models/Diacritics.php	Thu Sep 12 09:03:25 2013	(r983)
+++ phplib/models/Diacritics.php	Fri Sep 13 19:06:02 2013	(r984)
@@ -36,23 +36,42 @@
 	//inlocuieste diactriticele
 	private static function stripDiacritics($str) {
 
-		return $str;
-
-		
 		$strippedStr = '';
-		$strArray = str_split($str, 1);
 		
-		foreach($strArray as $ch) {
+		$currOffset = 0;
+		$finalOffset = strlen($str) - 1;
+		
+		while($currOffset <= $finalOffset) {
+
+			$ch = '';
+			if ($currOffset == $finalOffset) {
+
+				$ch = substr($str, $currOffset, 1);
+				$currOffset ++;
+			}
+			else {
+
+				$ch = substr($str, $currOffset, 2);
+				if (strstr('ăâîșț', $ch)) {
+
+					$currOffset += 2;
+				}
+				else {
+
+					$ch = substr($str, $currOffset, 1);
+					$currOffset ++;
+				}
+			}
 
 			$strippedStr .= self::replaceDiacritic($ch);
 		}
 
-
 		return $strippedStr;
 	}
 
 
-	public static function save2Db($before, $middle, $after) {
+
+	public function insertRow($before, $middle, $after, $diacritic) {
 
 		try {
 			
@@ -60,9 +79,9 @@
 			$tableObj->create();
 
 
-			$tableObj->before = self::stripDiacritics($before);
-			$tableObj->middle = self::stripDiacritics($middle);
-			$tableObj->after = self::stripDiacritics($after);
+			$tableObj->before = $before;
+			$tableObj->middle = $middle;
+			$tableObj->after = $after;
 			
 			
 			$tableObj->defaultForm = '0';
@@ -90,6 +109,49 @@
 			logException($ex);
 		}
 	}
+
+	
+
+
+	public static function updateRow($before, $middle, $after) {
+
+		return false;
+	}
+
+
+	public static function entryExists($before, $middle, $after) {
+		
+		return false;
+		$foundEntry = Model::factory(self::$_table)->raw_query("Select id from self::$_table where
+				 before = '$before' and middle = '$middle' and after = '$after';")->find_one();
+		if ($foundEntry) {
+
+			return true;
+		}
+
+		return false;
+	}
+
+
+	public static function save2Db($before, $middle, $after) {
+
+		$diacritic = substr($middle, 0);
+
+		$before = self::stripDiacritics($before);
+		$middle = self::stripDiacritics($middle);
+		$after = self::stripDiacritics($after);
+			
+
+			
+		if (self::entryExists($before, $middle, $after)) {
+
+			self::updateRow($before, $middle, $after, $diacritic);
+		}
+		else {
+
+			self::insertRow($before, $middle, $after, $diacritic);
+		}
+	}
 }
 
 ?>

Added: phplib/models/FilesUsedInDiacritics.php
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ phplib/models/FilesUsedInDiacritics.php	Fri Sep 13 19:06:02 2013	(r984)
@@ -0,0 +1,23 @@
+<?php
+
+
+class FilesUsedInDiacritics  extends BaseObject implements DatedObject {
+	
+	public static $_table = 'FilesUsedInDiacritics';
+
+	public static function save2Db($fileId) {
+
+		try {
+			$tableObj = Model::factory(self::$_table);
+			$tableObj->create();
+			$tableObj->fileId = $fileId;
+			$tableObj->save();
+		}
+		catch(Exception $ex) {
+
+			logException($ex);
+		}
+	}
+}
+
+?>
\ No newline at end of file

Modified: wwwbase/Crawler/DiacriticsBuilder.php
==============================================================================
--- wwwbase/Crawler/DiacriticsBuilder.php	Thu Sep 12 09:03:25 2013	(r983)
+++ wwwbase/Crawler/DiacriticsBuilder.php	Fri Sep 13 19:06:02 2013	(r984)
@@ -29,6 +29,10 @@
 	private static $diacritics;
 	private static $nonDiacritics;
 	private static $paddingNumber;
+	private static $paddingChar;
+	private $globalCount;
+	private $localCount;
+	private $currentDir;
 	/*
 	 * initialises instance variables
 	 */
@@ -38,6 +42,9 @@
 		self::$diacritics = pref_getSectionPreference("crawler", "diacritics");
 		self::$nonDiacritics = pref_getSectionPreference("crawler", "non_diacritics");
 		self::$paddingNumber = pref_getSectionPreference('crawler', 'diacritics_padding_length');
+		self::$paddingChar = pref_getSectionPreference('crawler', 'padding_char');
+
+		$this->globalCount = 0;
  	}
 
 	/* 
@@ -149,7 +156,7 @@
 
 			if ($infPadding) {
 
-				$before = '*' . $before;
+				$before = self::$paddingChar . $before;
 			}
 			else {
 
@@ -160,7 +167,7 @@
 					if ($this->isSeparator($infCh)) {
 
 						$infPadding = true;
-						$before = '*' . $before;
+						$before = self::$paddingChar . $before;
 					}
 					else {
 
@@ -181,7 +188,7 @@
 					if ($this->isSeparator($infCh)) {
 
 						$infPadding = true;
-						$before = '*' . $before;
+						$before = self::$paddingChar . $before;
 					}
 					else {
 
@@ -210,7 +217,7 @@
 
 			if ($supPadding) {
 
-				$after .= '*';
+				$after .= self::$paddingChar;
 			}
 			else {
 
@@ -221,7 +228,7 @@
 					if ($this->isSeparator($infCh)) {
 
 						$supPadding = true;
-						$after = '*' . $after;
+						$after = self::$paddingChar . $after;
 					}
 					else {
 
@@ -242,7 +249,7 @@
 					if ($this->isSeparator($supCh)) {
 
 						$supPadding = true;
-						$after .= '*';
+						$after .= self::$paddingChar;
 					}
 					else {
 
@@ -269,7 +276,7 @@
 
 			if ($inferiorOffset < 0) {
 				
-				$before = '*' . $before;
+				$before = self::$paddingChar . $before;
 			}
 			else {
 				
@@ -280,12 +287,12 @@
 					$inferiorSeparator = $this->isSeparator($ch);
 				}
 
-				$before = ($inferiorSeparator ? '*' : $ch) . $before;
+				$before = ($inferiorSeparator ? self::$paddingChar : $ch) . $before;
 			}
 
 			if ($superiorOffset > $this->fileEndOffset) {
 				
-				$after .= '*';
+				$after .= self::$paddingChar;
 			}
 			else {
 
@@ -297,7 +304,7 @@
 					$superiorSeparator = $this->isSeparator($ch);
 				}
 
-				$after .= ($superiorSeparator ? '*' : $ch);
+				$after .= ($superiorSeparator ? self::$paddingChar : $ch);
 			}
 
 		}

Added: wwwbase/Crawler/DiacriticsTables.sql
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ wwwbase/Crawler/DiacriticsTables.sql	Fri Sep 13 19:06:02 2013	(r984)
@@ -0,0 +1,32 @@
+
+--
+-- Table structure for table `Diacritics`
+--
+
+CREATE TABLE IF NOT EXISTS `Diacritics` (
+  `id` bigint(20) NOT NULL AUTO_INCREMENT,
+  `before` varchar(10) NOT NULL,
+  `middle` varchar(2) NOT NULL,
+  `after` varchar(10) NOT NULL,
+  `defaultForm` int(11) NOT NULL,
+  `curvedForm` int(11) NOT NULL,
+  `circumflexForm` int(11) NOT NULL,
+  `createDate` int(11) DEFAULT NULL,
+  `modDate` int(11) DEFAULT NULL,
+  PRIMARY KEY (`id`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8 AUTO_INCREMENT=1 ;
+
+
+--
+-- Table structure for table `FilesUsedInDiacritics`
+--
+
+CREATE TABLE IF NOT EXISTS `FilesUsedInDiacritics` (
+  `fileId` int(11) NOT NULL,
+  `createDate` int(11) DEFAULT NULL,
+  `modDate` int(11) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+
+/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
+/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
+/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;


More information about the Dev mailing list