[dev] [commit] r1007 - phplib/models wwwbase wwwbase/Crawler

automailer at dexonline.ro automailer at dexonline.ro
Wed Sep 25 21:31:23 EEST 2013


Author: alinu
Date: Wed Sep 25 21:31:23 2013
New Revision: 1007

Log:
CrawledPage, uitasem sa aduc ultima versiune, uitasem un ->find_one() la ultima functie
AbstractCrawler.php - eligeableUrl este eligibleUrl
DiacriticsBuilder.php si diacritice.php - schimbat modul de adaugare in baza de date pentru imbunatatirea vitezei: reverse(before)
sumar = **sum a r****, acum este mus** a r****

Modified:
   phplib/models/CrawledPage.php
   wwwbase/Crawler/AbstractCrawler.php
   wwwbase/Crawler/DiacriticsBuilder.php
   wwwbase/diacritice.php

Modified: phplib/models/CrawledPage.php
==============================================================================
--- phplib/models/CrawledPage.php	Wed Sep 25 09:58:45 2013	(r1006)
+++ phplib/models/CrawledPage.php	Wed Sep 25 21:31:23 2013	(r1007)
@@ -38,7 +38,7 @@
 
 	 function getNextDiacriticsFile() {
 
-	 	return Model::factory(self::$_table)->raw_query("select id, parsedTextPath from CrawledPage where id not in (select fileId from FilesUsedInDiacritics);");
+	 	return Model::factory(self::$_table)->raw_query("select id, parsedTextPath from CrawledPage where id not in (select fileId from FilesUsedInDiacritics);")->find_one();
 	 }
 	
 

Modified: wwwbase/Crawler/AbstractCrawler.php
==============================================================================
--- wwwbase/Crawler/AbstractCrawler.php	Wed Sep 25 09:58:45 2013	(r1006)
+++ wwwbase/Crawler/AbstractCrawler.php	Wed Sep 25 21:31:23 2013	(r1007)
@@ -211,7 +211,7 @@
 		return str_get_html($buffer);
     }
 
-    function eligeableUrl($url) {
+    function eligibleUrl($url) {
 
     	$resource = parse_utf8_url($url);
     	$pathInfo = pathinfo($resource['path']);
@@ -237,7 +237,7 @@
 	function processLink($url) {
 
 
-		if (!$this->eligeableUrl($url)) {
+		if (!$this->eligibleUrl($url)) {
 
 			return;
 		}

Modified: wwwbase/Crawler/DiacriticsBuilder.php
==============================================================================
--- wwwbase/Crawler/DiacriticsBuilder.php	Wed Sep 25 09:58:45 2013	(r1006)
+++ wwwbase/Crawler/DiacriticsBuilder.php	Wed Sep 25 21:31:23 2013	(r1007)
@@ -58,13 +58,12 @@
 
 			$this->currentFolder = $folder;
 			$this->localCount = 0;
-			$this->folderCount = iterator_count(new DirectoryIterator(substr($crawledPage->parsedTextPath,0,strrpos($crawledPage->parsedTextPath, '/'))));
 		}
 
 		$this->localCount ++;		
 		$this->globalCount ++;
 
- 		crawlerLog("Total(this run)::$this->globalCount, now processing $folder $this->localCount/".$this->folderCount);
+		crawlerLog("Total(this run)::$this->globalCount, now processing $folder");
  	}
 
 	/* 
@@ -77,6 +76,7 @@
 
 			$crawledPage = CrawledPage::getNextDiacriticsFile();
 			
+
 			if ($crawledPage == null) {
 
 				return null;
@@ -142,28 +142,31 @@
 		for ($i = 0; $i < self::$paddingNumber; $i++) {
 			
 			if ($infOffset < 0) {
-				$before = self::$paddingChar . $before;
+				//$before = self::$paddingChar . $before;
+				$before = $before . self::$paddingChar;
 			}
 			else {
 				if (!$infPadding) {
-					$infCh = StringUtil::getCharAt($this->text, $infOffset);
+					$infCh = StringUtil::getCharAt($this->file, $infOffset);
 					$infPadding = self::isSeparator($infCh);
 				}
 				if ($infPadding) {
-					$before = self::$paddingChar . $before;
+					//$before = self::$paddingChar . $before;
+					$before = $before . self::$paddingChar;
 				}
 				else {
-					$before = $infCh . $before;
+					//$before = $infCh . $before;
+					$before = $before . $infCh;
 					$infOffset --;
 				}
-			}
-			
-			if ($supOffset > $this->textEndOffset) {
+			}	
+
+			if ($supOffset > $this->fileEndOffset) {
 				$after = $after . self::$paddingChar;
 			}
 			else {
 				if (!$supPadding) {
-					$supCh = StringUtil::getCharAt($this->text, $supOffset);
+					$supCh = StringUtil::getCharAt($this->file, $supOffset);
 					$supPadding = self::isSeparator($supCh);
 				}
 				if ($supPadding) {
@@ -188,7 +191,7 @@
 		$this->fileEndOffset = mb_strlen($file) - 1;
 
 		while(($offset = $this->getNextOffset()) !== null) {
-			
+
 			$this->leftAndRightPadding($offset);
 		}
 	}

Modified: wwwbase/diacritice.php
==============================================================================
--- wwwbase/diacritice.php	Wed Sep 25 09:58:45 2013	(r1006)
+++ wwwbase/diacritice.php	Wed Sep 25 21:31:23 2013	(r1007)
@@ -138,7 +138,8 @@
 		for ($i = 0; $i < self::$paddingNumber; $i++) {
 			
 			if ($infOffset < 0) {
-				$before = self::$paddingChar . $before;
+				//$before = self::$paddingChar . $before;
+				$before = $before . self::$paddingChar;
 			}
 			else {
 				if (!$infPadding) {
@@ -146,15 +147,15 @@
 					$infPadding = self::isSeparator($infCh);
 				}
 				if ($infPadding) {
-					$before = self::$paddingChar . $before;
+					//$before = self::$paddingChar . $before;
+					$before = $before . self::$paddingChar;
 				}
 				else {
-					$before = $infCh . $before;
+					//$before = $infCh . $before;
+					$before = $before . $infCh;
 					$infOffset --;
 				}
-			}
-			
-			
+			}	
 
 			if ($supOffset > $this->textEndOffset) {
 				$after = $after . self::$paddingChar;
@@ -174,6 +175,7 @@
 			}
 		}
 
+
 		crawlerLog("IN TEXT " . $before .'|' . $middle . '|' . $after);
 
 		$tableObj = Diacritics::entryExists($before, $middle, $after);


More information about the Dev mailing list