[dev] [commit] r984 - phplib/models wwwbase/Crawler
Cătălin Frâncu
cata at francu.com
Wed Sep 18 19:23:02 EEST 2013
E-n regulă!
Cătălin
On 2013-09-13 09:06, automailer at dexonline.ro wrote:
> Author: alinu
> Date: Fri Sep 13 19:06:02 2013
> New Revision: 984
>
> Log:
> Aceasta versiune are models/FilesUsedInDiacritics.php si tabelele
> pentru diacritice.
> Nu este importanta revizuirea ei deoarece am uploadat-o pentru a nu o
> pierde.
>
> Added:
> phplib/models/FilesUsedInDiacritics.php
> wwwbase/Crawler/DiacriticsTables.sql
> Modified:
> phplib/models/Diacritics.php
> wwwbase/Crawler/DiacriticsBuilder.php
>
> Modified: phplib/models/Diacritics.php
> ==============================================================================
> --- phplib/models/Diacritics.php Thu Sep 12 09:03:25 2013 (r983)
> +++ phplib/models/Diacritics.php Fri Sep 13 19:06:02 2013 (r984)
> @@ -36,23 +36,42 @@
> //inlocuieste diactriticele
> private static function stripDiacritics($str) {
>
> - return $str;
> -
> -
> $strippedStr = '';
> - $strArray = str_split($str, 1);
>
> - foreach($strArray as $ch) {
> + $currOffset = 0;
> + $finalOffset = strlen($str) - 1;
> +
> + while($currOffset <= $finalOffset) {
> +
> + $ch = '';
> + if ($currOffset == $finalOffset) {
> +
> + $ch = substr($str, $currOffset, 1);
> + $currOffset ++;
> + }
> + else {
> +
> + $ch = substr($str, $currOffset, 2);
> + if (strstr('ăâîșț', $ch)) {
> +
> + $currOffset += 2;
> + }
> + else {
> +
> + $ch = substr($str, $currOffset, 1);
> + $currOffset ++;
> + }
> + }
>
> $strippedStr .= self::replaceDiacritic($ch);
> }
>
> -
> return $strippedStr;
> }
>
>
> - public static function save2Db($before, $middle, $after) {
> +
> + public function insertRow($before, $middle, $after, $diacritic) {
>
> try {
>
> @@ -60,9 +79,9 @@
> $tableObj->create();
>
>
> - $tableObj->before = self::stripDiacritics($before);
> - $tableObj->middle = self::stripDiacritics($middle);
> - $tableObj->after = self::stripDiacritics($after);
> + $tableObj->before = $before;
> + $tableObj->middle = $middle;
> + $tableObj->after = $after;
>
>
> $tableObj->defaultForm = '0';
> @@ -90,6 +109,49 @@
> logException($ex);
> }
> }
> +
> +
> +
> +
> + public static function updateRow($before, $middle, $after) {
> +
> + return false;
> + }
> +
> +
> + public static function entryExists($before, $middle, $after) {
> +
> + return false;
> + $foundEntry = Model::factory(self::$_table)->raw_query("Select id
> from self::$_table where
> + before = '$before' and middle = '$middle' and after =
> '$after';")->find_one();
> + if ($foundEntry) {
> +
> + return true;
> + }
> +
> + return false;
> + }
> +
> +
> + public static function save2Db($before, $middle, $after) {
> +
> + $diacritic = substr($middle, 0);
> +
> + $before = self::stripDiacritics($before);
> + $middle = self::stripDiacritics($middle);
> + $after = self::stripDiacritics($after);
> +
> +
> +
> + if (self::entryExists($before, $middle, $after)) {
> +
> + self::updateRow($before, $middle, $after, $diacritic);
> + }
> + else {
> +
> + self::insertRow($before, $middle, $after, $diacritic);
> + }
> + }
> }
>
> ?>
>
> Added: phplib/models/FilesUsedInDiacritics.php
> ==============================================================================
> --- /dev/null 00:00:00 1970 (empty, because file is newly added)
> +++ phplib/models/FilesUsedInDiacritics.php Fri Sep 13 19:06:02
> 2013 (r984)
> @@ -0,0 +1,23 @@
> +<?php
> +
> +
> +class FilesUsedInDiacritics extends BaseObject implements DatedObject
> {
> +
> + public static $_table = 'FilesUsedInDiacritics';
> +
> + public static function save2Db($fileId) {
> +
> + try {
> + $tableObj = Model::factory(self::$_table);
> + $tableObj->create();
> + $tableObj->fileId = $fileId;
> + $tableObj->save();
> + }
> + catch(Exception $ex) {
> +
> + logException($ex);
> + }
> + }
> +}
> +
> +?>
> \ No newline at end of file
>
> Modified: wwwbase/Crawler/DiacriticsBuilder.php
> ==============================================================================
> --- wwwbase/Crawler/DiacriticsBuilder.php Thu Sep 12 09:03:25
> 2013 (r983)
> +++ wwwbase/Crawler/DiacriticsBuilder.php Fri Sep 13 19:06:02
> 2013 (r984)
> @@ -29,6 +29,10 @@
> private static $diacritics;
> private static $nonDiacritics;
> private static $paddingNumber;
> + private static $paddingChar;
> + private $globalCount;
> + private $localCount;
> + private $currentDir;
> /*
> * initialises instance variables
> */
> @@ -38,6 +42,9 @@
> self::$diacritics = pref_getSectionPreference("crawler",
> "diacritics");
> self::$nonDiacritics = pref_getSectionPreference("crawler",
> "non_diacritics");
> self::$paddingNumber = pref_getSectionPreference('crawler',
> 'diacritics_padding_length');
> + self::$paddingChar = pref_getSectionPreference('crawler',
> 'padding_char');
> +
> + $this->globalCount = 0;
> }
>
> /*
> @@ -149,7 +156,7 @@
>
> if ($infPadding) {
>
> - $before = '*' . $before;
> + $before = self::$paddingChar . $before;
> }
> else {
>
> @@ -160,7 +167,7 @@
> if ($this->isSeparator($infCh)) {
>
> $infPadding = true;
> - $before = '*' . $before;
> + $before = self::$paddingChar . $before;
> }
> else {
>
> @@ -181,7 +188,7 @@
> if ($this->isSeparator($infCh)) {
>
> $infPadding = true;
> - $before = '*' . $before;
> + $before = self::$paddingChar . $before;
> }
> else {
>
> @@ -210,7 +217,7 @@
>
> if ($supPadding) {
>
> - $after .= '*';
> + $after .= self::$paddingChar;
> }
> else {
>
> @@ -221,7 +228,7 @@
> if ($this->isSeparator($infCh)) {
>
> $supPadding = true;
> - $after = '*' . $after;
> + $after = self::$paddingChar . $after;
> }
> else {
>
> @@ -242,7 +249,7 @@
> if ($this->isSeparator($supCh)) {
>
> $supPadding = true;
> - $after .= '*';
> + $after .= self::$paddingChar;
> }
> else {
>
> @@ -269,7 +276,7 @@
>
> if ($inferiorOffset < 0) {
>
> - $before = '*' . $before;
> + $before = self::$paddingChar . $before;
> }
> else {
>
> @@ -280,12 +287,12 @@
> $inferiorSeparator = $this->isSeparator($ch);
> }
>
> - $before = ($inferiorSeparator ? '*' : $ch) . $before;
> + $before = ($inferiorSeparator ? self::$paddingChar : $ch) .
> $before;
> }
>
> if ($superiorOffset > $this->fileEndOffset) {
>
> - $after .= '*';
> + $after .= self::$paddingChar;
> }
> else {
>
> @@ -297,7 +304,7 @@
> $superiorSeparator = $this->isSeparator($ch);
> }
>
> - $after .= ($superiorSeparator ? '*' : $ch);
> + $after .= ($superiorSeparator ? self::$paddingChar : $ch);
> }
>
> }
>
> Added: wwwbase/Crawler/DiacriticsTables.sql
> ==============================================================================
> --- /dev/null 00:00:00 1970 (empty, because file is newly added)
> +++ wwwbase/Crawler/DiacriticsTables.sql Fri Sep 13 19:06:02
> 2013 (r984)
> @@ -0,0 +1,32 @@
> +
> +--
> +-- Table structure for table `Diacritics`
> +--
> +
> +CREATE TABLE IF NOT EXISTS `Diacritics` (
> + `id` bigint(20) NOT NULL AUTO_INCREMENT,
> + `before` varchar(10) NOT NULL,
> + `middle` varchar(2) NOT NULL,
> + `after` varchar(10) NOT NULL,
> + `defaultForm` int(11) NOT NULL,
> + `curvedForm` int(11) NOT NULL,
> + `circumflexForm` int(11) NOT NULL,
> + `createDate` int(11) DEFAULT NULL,
> + `modDate` int(11) DEFAULT NULL,
> + PRIMARY KEY (`id`)
> +) ENGINE=InnoDB DEFAULT CHARSET=utf8 AUTO_INCREMENT=1 ;
> +
> +
> +--
> +-- Table structure for table `FilesUsedInDiacritics`
> +--
> +
> +CREATE TABLE IF NOT EXISTS `FilesUsedInDiacritics` (
> + `fileId` int(11) NOT NULL,
> + `createDate` int(11) DEFAULT NULL,
> + `modDate` int(11) DEFAULT NULL
> +) ENGINE=InnoDB DEFAULT CHARSET=utf8;
> +
> +/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
> +/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
> +/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
> _______________________________________________
> Dev mailing list
> Dev at dexonline.ro
> http://list.dexonline.ro/listinfo/dev
More information about the Dev
mailing list