[dev] [commit] r984 - phplib/models wwwbase/Crawler
automailer at dexonline.ro
automailer at dexonline.ro
Fri Sep 13 19:06:02 EEST 2013
Author: alinu
Date: Fri Sep 13 19:06:02 2013
New Revision: 984
Log:
Aceasta versiune are models/FilesUsedInDiacritics.php si tabelele pentru diacritice.
Nu este importanta revizuirea ei deoarece am uploadat-o pentru a nu o pierde.
Added:
phplib/models/FilesUsedInDiacritics.php
wwwbase/Crawler/DiacriticsTables.sql
Modified:
phplib/models/Diacritics.php
wwwbase/Crawler/DiacriticsBuilder.php
Modified: phplib/models/Diacritics.php
==============================================================================
--- phplib/models/Diacritics.php Thu Sep 12 09:03:25 2013 (r983)
+++ phplib/models/Diacritics.php Fri Sep 13 19:06:02 2013 (r984)
@@ -36,23 +36,42 @@
//inlocuieste diactriticele
private static function stripDiacritics($str) {
- return $str;
-
-
$strippedStr = '';
- $strArray = str_split($str, 1);
- foreach($strArray as $ch) {
+ $currOffset = 0;
+ $finalOffset = strlen($str) - 1;
+
+ while($currOffset <= $finalOffset) {
+
+ $ch = '';
+ if ($currOffset == $finalOffset) {
+
+ $ch = substr($str, $currOffset, 1);
+ $currOffset ++;
+ }
+ else {
+
+ $ch = substr($str, $currOffset, 2);
+ if (strstr('ăâîșț', $ch)) {
+
+ $currOffset += 2;
+ }
+ else {
+
+ $ch = substr($str, $currOffset, 1);
+ $currOffset ++;
+ }
+ }
$strippedStr .= self::replaceDiacritic($ch);
}
-
return $strippedStr;
}
- public static function save2Db($before, $middle, $after) {
+
+ public function insertRow($before, $middle, $after, $diacritic) {
try {
@@ -60,9 +79,9 @@
$tableObj->create();
- $tableObj->before = self::stripDiacritics($before);
- $tableObj->middle = self::stripDiacritics($middle);
- $tableObj->after = self::stripDiacritics($after);
+ $tableObj->before = $before;
+ $tableObj->middle = $middle;
+ $tableObj->after = $after;
$tableObj->defaultForm = '0';
@@ -90,6 +109,49 @@
logException($ex);
}
}
+
+
+
+
+ public static function updateRow($before, $middle, $after) {
+
+ return false;
+ }
+
+
+ public static function entryExists($before, $middle, $after) {
+
+ return false;
+ $foundEntry = Model::factory(self::$_table)->raw_query("Select id from self::$_table where
+ before = '$before' and middle = '$middle' and after = '$after';")->find_one();
+ if ($foundEntry) {
+
+ return true;
+ }
+
+ return false;
+ }
+
+
+ public static function save2Db($before, $middle, $after) {
+
+ $diacritic = substr($middle, 0);
+
+ $before = self::stripDiacritics($before);
+ $middle = self::stripDiacritics($middle);
+ $after = self::stripDiacritics($after);
+
+
+
+ if (self::entryExists($before, $middle, $after)) {
+
+ self::updateRow($before, $middle, $after, $diacritic);
+ }
+ else {
+
+ self::insertRow($before, $middle, $after, $diacritic);
+ }
+ }
}
?>
Added: phplib/models/FilesUsedInDiacritics.php
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ phplib/models/FilesUsedInDiacritics.php Fri Sep 13 19:06:02 2013 (r984)
@@ -0,0 +1,23 @@
+<?php
+
+
+class FilesUsedInDiacritics extends BaseObject implements DatedObject {
+
+ public static $_table = 'FilesUsedInDiacritics';
+
+ public static function save2Db($fileId) {
+
+ try {
+ $tableObj = Model::factory(self::$_table);
+ $tableObj->create();
+ $tableObj->fileId = $fileId;
+ $tableObj->save();
+ }
+ catch(Exception $ex) {
+
+ logException($ex);
+ }
+ }
+}
+
+?>
\ No newline at end of file
Modified: wwwbase/Crawler/DiacriticsBuilder.php
==============================================================================
--- wwwbase/Crawler/DiacriticsBuilder.php Thu Sep 12 09:03:25 2013 (r983)
+++ wwwbase/Crawler/DiacriticsBuilder.php Fri Sep 13 19:06:02 2013 (r984)
@@ -29,6 +29,10 @@
private static $diacritics;
private static $nonDiacritics;
private static $paddingNumber;
+ private static $paddingChar;
+ private $globalCount;
+ private $localCount;
+ private $currentDir;
/*
* initialises instance variables
*/
@@ -38,6 +42,9 @@
self::$diacritics = pref_getSectionPreference("crawler", "diacritics");
self::$nonDiacritics = pref_getSectionPreference("crawler", "non_diacritics");
self::$paddingNumber = pref_getSectionPreference('crawler', 'diacritics_padding_length');
+ self::$paddingChar = pref_getSectionPreference('crawler', 'padding_char');
+
+ $this->globalCount = 0;
}
/*
@@ -149,7 +156,7 @@
if ($infPadding) {
- $before = '*' . $before;
+ $before = self::$paddingChar . $before;
}
else {
@@ -160,7 +167,7 @@
if ($this->isSeparator($infCh)) {
$infPadding = true;
- $before = '*' . $before;
+ $before = self::$paddingChar . $before;
}
else {
@@ -181,7 +188,7 @@
if ($this->isSeparator($infCh)) {
$infPadding = true;
- $before = '*' . $before;
+ $before = self::$paddingChar . $before;
}
else {
@@ -210,7 +217,7 @@
if ($supPadding) {
- $after .= '*';
+ $after .= self::$paddingChar;
}
else {
@@ -221,7 +228,7 @@
if ($this->isSeparator($infCh)) {
$supPadding = true;
- $after = '*' . $after;
+ $after = self::$paddingChar . $after;
}
else {
@@ -242,7 +249,7 @@
if ($this->isSeparator($supCh)) {
$supPadding = true;
- $after .= '*';
+ $after .= self::$paddingChar;
}
else {
@@ -269,7 +276,7 @@
if ($inferiorOffset < 0) {
- $before = '*' . $before;
+ $before = self::$paddingChar . $before;
}
else {
@@ -280,12 +287,12 @@
$inferiorSeparator = $this->isSeparator($ch);
}
- $before = ($inferiorSeparator ? '*' : $ch) . $before;
+ $before = ($inferiorSeparator ? self::$paddingChar : $ch) . $before;
}
if ($superiorOffset > $this->fileEndOffset) {
- $after .= '*';
+ $after .= self::$paddingChar;
}
else {
@@ -297,7 +304,7 @@
$superiorSeparator = $this->isSeparator($ch);
}
- $after .= ($superiorSeparator ? '*' : $ch);
+ $after .= ($superiorSeparator ? self::$paddingChar : $ch);
}
}
Added: wwwbase/Crawler/DiacriticsTables.sql
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ wwwbase/Crawler/DiacriticsTables.sql Fri Sep 13 19:06:02 2013 (r984)
@@ -0,0 +1,32 @@
+
+--
+-- Table structure for table `Diacritics`
+--
+
+CREATE TABLE IF NOT EXISTS `Diacritics` (
+ `id` bigint(20) NOT NULL AUTO_INCREMENT,
+ `before` varchar(10) NOT NULL,
+ `middle` varchar(2) NOT NULL,
+ `after` varchar(10) NOT NULL,
+ `defaultForm` int(11) NOT NULL,
+ `curvedForm` int(11) NOT NULL,
+ `circumflexForm` int(11) NOT NULL,
+ `createDate` int(11) DEFAULT NULL,
+ `modDate` int(11) DEFAULT NULL,
+ PRIMARY KEY (`id`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8 AUTO_INCREMENT=1 ;
+
+
+--
+-- Table structure for table `FilesUsedInDiacritics`
+--
+
+CREATE TABLE IF NOT EXISTS `FilesUsedInDiacritics` (
+ `fileId` int(11) NOT NULL,
+ `createDate` int(11) DEFAULT NULL,
+ `modDate` int(11) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+
+/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
+/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
+/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
More information about the Dev
mailing list