[dev] [commit] r984 - phplib/models wwwbase/Crawler

Cătălin Frâncu cata at francu.com
Wed Sep 18 19:23:02 EEST 2013


E-n regulă!
Cătălin

On 2013-09-13 09:06, automailer at dexonline.ro wrote:
> Author: alinu
> Date: Fri Sep 13 19:06:02 2013
> New Revision: 984
> 
> Log:
> Aceasta versiune are models/FilesUsedInDiacritics.php si tabelele
> pentru diacritice.
> Nu este importanta revizuirea ei deoarece am uploadat-o pentru a nu o 
> pierde.
> 
> Added:
>    phplib/models/FilesUsedInDiacritics.php
>    wwwbase/Crawler/DiacriticsTables.sql
> Modified:
>    phplib/models/Diacritics.php
>    wwwbase/Crawler/DiacriticsBuilder.php
> 
> Modified: phplib/models/Diacritics.php
> ==============================================================================
> --- phplib/models/Diacritics.php	Thu Sep 12 09:03:25 2013	(r983)
> +++ phplib/models/Diacritics.php	Fri Sep 13 19:06:02 2013	(r984)
> @@ -36,23 +36,42 @@
>  	//inlocuieste diactriticele
>  	private static function stripDiacritics($str) {
> 
> -		return $str;
> -
> -
>  		$strippedStr = '';
> -		$strArray = str_split($str, 1);
> 
> -		foreach($strArray as $ch) {
> +		$currOffset = 0;
> +		$finalOffset = strlen($str) - 1;
> +
> +		while($currOffset <= $finalOffset) {
> +
> +			$ch = '';
> +			if ($currOffset == $finalOffset) {
> +
> +				$ch = substr($str, $currOffset, 1);
> +				$currOffset ++;
> +			}
> +			else {
> +
> +				$ch = substr($str, $currOffset, 2);
> +				if (strstr('ăâîșț', $ch)) {
> +
> +					$currOffset += 2;
> +				}
> +				else {
> +
> +					$ch = substr($str, $currOffset, 1);
> +					$currOffset ++;
> +				}
> +			}
> 
>  			$strippedStr .= self::replaceDiacritic($ch);
>  		}
> 
> -
>  		return $strippedStr;
>  	}
> 
> 
> -	public static function save2Db($before, $middle, $after) {
> +
> +	public function insertRow($before, $middle, $after, $diacritic) {
> 
>  		try {
> 
> @@ -60,9 +79,9 @@
>  			$tableObj->create();
> 
> 
> -			$tableObj->before = self::stripDiacritics($before);
> -			$tableObj->middle = self::stripDiacritics($middle);
> -			$tableObj->after = self::stripDiacritics($after);
> +			$tableObj->before = $before;
> +			$tableObj->middle = $middle;
> +			$tableObj->after = $after;
> 
> 
>  			$tableObj->defaultForm = '0';
> @@ -90,6 +109,49 @@
>  			logException($ex);
>  		}
>  	}
> +
> +
> +
> +
> +	public static function updateRow($before, $middle, $after) {
> +
> +		return false;
> +	}
> +
> +
> +	public static function entryExists($before, $middle, $after) {
> +
> +		return false;
> +		$foundEntry = Model::factory(self::$_table)->raw_query("Select id
> from self::$_table where
> +				 before = '$before' and middle = '$middle' and after =
> '$after';")->find_one();
> +		if ($foundEntry) {
> +
> +			return true;
> +		}
> +
> +		return false;
> +	}
> +
> +
> +	public static function save2Db($before, $middle, $after) {
> +
> +		$diacritic = substr($middle, 0);
> +
> +		$before = self::stripDiacritics($before);
> +		$middle = self::stripDiacritics($middle);
> +		$after = self::stripDiacritics($after);
> +
> +
> +
> +		if (self::entryExists($before, $middle, $after)) {
> +
> +			self::updateRow($before, $middle, $after, $diacritic);
> +		}
> +		else {
> +
> +			self::insertRow($before, $middle, $after, $diacritic);
> +		}
> +	}
>  }
> 
>  ?>
> 
> Added: phplib/models/FilesUsedInDiacritics.php
> ==============================================================================
> --- /dev/null	00:00:00 1970	(empty, because file is newly added)
> +++ phplib/models/FilesUsedInDiacritics.php	Fri Sep 13 19:06:02 
> 2013	(r984)
> @@ -0,0 +1,23 @@
> +<?php
> +
> +
> +class FilesUsedInDiacritics  extends BaseObject implements DatedObject 
> {
> +
> +	public static $_table = 'FilesUsedInDiacritics';
> +
> +	public static function save2Db($fileId) {
> +
> +		try {
> +			$tableObj = Model::factory(self::$_table);
> +			$tableObj->create();
> +			$tableObj->fileId = $fileId;
> +			$tableObj->save();
> +		}
> +		catch(Exception $ex) {
> +
> +			logException($ex);
> +		}
> +	}
> +}
> +
> +?>
> \ No newline at end of file
> 
> Modified: wwwbase/Crawler/DiacriticsBuilder.php
> ==============================================================================
> --- wwwbase/Crawler/DiacriticsBuilder.php	Thu Sep 12 09:03:25 
> 2013	(r983)
> +++ wwwbase/Crawler/DiacriticsBuilder.php	Fri Sep 13 19:06:02 
> 2013	(r984)
> @@ -29,6 +29,10 @@
>  	private static $diacritics;
>  	private static $nonDiacritics;
>  	private static $paddingNumber;
> +	private static $paddingChar;
> +	private $globalCount;
> +	private $localCount;
> +	private $currentDir;
>  	/*
>  	 * initialises instance variables
>  	 */
> @@ -38,6 +42,9 @@
>  		self::$diacritics = pref_getSectionPreference("crawler", 
> "diacritics");
>  		self::$nonDiacritics = pref_getSectionPreference("crawler",
> "non_diacritics");
>  		self::$paddingNumber = pref_getSectionPreference('crawler',
> 'diacritics_padding_length');
> +		self::$paddingChar = pref_getSectionPreference('crawler', 
> 'padding_char');
> +
> +		$this->globalCount = 0;
>   	}
> 
>  	/*
> @@ -149,7 +156,7 @@
> 
>  			if ($infPadding) {
> 
> -				$before = '*' . $before;
> +				$before = self::$paddingChar . $before;
>  			}
>  			else {
> 
> @@ -160,7 +167,7 @@
>  					if ($this->isSeparator($infCh)) {
> 
>  						$infPadding = true;
> -						$before = '*' . $before;
> +						$before = self::$paddingChar . $before;
>  					}
>  					else {
> 
> @@ -181,7 +188,7 @@
>  					if ($this->isSeparator($infCh)) {
> 
>  						$infPadding = true;
> -						$before = '*' . $before;
> +						$before = self::$paddingChar . $before;
>  					}
>  					else {
> 
> @@ -210,7 +217,7 @@
> 
>  			if ($supPadding) {
> 
> -				$after .= '*';
> +				$after .= self::$paddingChar;
>  			}
>  			else {
> 
> @@ -221,7 +228,7 @@
>  					if ($this->isSeparator($infCh)) {
> 
>  						$supPadding = true;
> -						$after = '*' . $after;
> +						$after = self::$paddingChar . $after;
>  					}
>  					else {
> 
> @@ -242,7 +249,7 @@
>  					if ($this->isSeparator($supCh)) {
> 
>  						$supPadding = true;
> -						$after .= '*';
> +						$after .= self::$paddingChar;
>  					}
>  					else {
> 
> @@ -269,7 +276,7 @@
> 
>  			if ($inferiorOffset < 0) {
> 
> -				$before = '*' . $before;
> +				$before = self::$paddingChar . $before;
>  			}
>  			else {
> 
> @@ -280,12 +287,12 @@
>  					$inferiorSeparator = $this->isSeparator($ch);
>  				}
> 
> -				$before = ($inferiorSeparator ? '*' : $ch) . $before;
> +				$before = ($inferiorSeparator ? self::$paddingChar : $ch) . 
> $before;
>  			}
> 
>  			if ($superiorOffset > $this->fileEndOffset) {
> 
> -				$after .= '*';
> +				$after .= self::$paddingChar;
>  			}
>  			else {
> 
> @@ -297,7 +304,7 @@
>  					$superiorSeparator = $this->isSeparator($ch);
>  				}
> 
> -				$after .= ($superiorSeparator ? '*' : $ch);
> +				$after .= ($superiorSeparator ? self::$paddingChar : $ch);
>  			}
> 
>  		}
> 
> Added: wwwbase/Crawler/DiacriticsTables.sql
> ==============================================================================
> --- /dev/null	00:00:00 1970	(empty, because file is newly added)
> +++ wwwbase/Crawler/DiacriticsTables.sql	Fri Sep 13 19:06:02 
> 2013	(r984)
> @@ -0,0 +1,32 @@
> +
> +--
> +-- Table structure for table `Diacritics`
> +--
> +
> +CREATE TABLE IF NOT EXISTS `Diacritics` (
> +  `id` bigint(20) NOT NULL AUTO_INCREMENT,
> +  `before` varchar(10) NOT NULL,
> +  `middle` varchar(2) NOT NULL,
> +  `after` varchar(10) NOT NULL,
> +  `defaultForm` int(11) NOT NULL,
> +  `curvedForm` int(11) NOT NULL,
> +  `circumflexForm` int(11) NOT NULL,
> +  `createDate` int(11) DEFAULT NULL,
> +  `modDate` int(11) DEFAULT NULL,
> +  PRIMARY KEY (`id`)
> +) ENGINE=InnoDB DEFAULT CHARSET=utf8 AUTO_INCREMENT=1 ;
> +
> +
> +--
> +-- Table structure for table `FilesUsedInDiacritics`
> +--
> +
> +CREATE TABLE IF NOT EXISTS `FilesUsedInDiacritics` (
> +  `fileId` int(11) NOT NULL,
> +  `createDate` int(11) DEFAULT NULL,
> +  `modDate` int(11) DEFAULT NULL
> +) ENGINE=InnoDB DEFAULT CHARSET=utf8;
> +
> +/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
> +/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
> +/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
> _______________________________________________
> Dev mailing list
> Dev at dexonline.ro
> http://list.dexonline.ro/listinfo/dev


More information about the Dev mailing list