[dev] [commit] r1008 - wwwbase/Crawler

automailer at dexonline.ro automailer at dexonline.ro
Wed Sep 25 21:33:00 EEST 2013


Author: alinu
Date: Wed Sep 25 21:33:00 2013
New Revision: 1008

Log:
Sectiunea de crawler de la dex.conf

Modified:
   wwwbase/Crawler/crawler_dex.conf

Modified: wwwbase/Crawler/crawler_dex.conf
==============================================================================
--- wwwbase/Crawler/crawler_dex.conf	Wed Sep 25 21:31:23 2013	(r1007)
+++ wwwbase/Crawler/crawler_dex.conf	Wed Sep 25 21:33:00 2013	(r1008)
@@ -8,7 +8,7 @@
 parsed_text_path=ParsedText/
 
 ;waiting time between getting pages
-t_wait=10
+t_wait=15
 
 
 ;if this is true, then the application will
@@ -22,10 +22,13 @@
 ;outputs messages to the screen
 ;values are true and false
 log2screen=true
+
 ;outputs messages to the file
 ;specified bycrawler_log
 log2file=true
 new_line=PHP_EOL
+;afiseaza apelurile functiilor in log
+function_trace=false
 
 
 ;the most probable directory index file
@@ -38,5 +41,18 @@
 user_agent_location=/var/www/CrawlerData/user_agent
 
 
+diacritics_buffer_limit=2048
+
 ;diactritics list
 diacritics=ăâîșț
+
+padding_char=*
+
+;non-diacritics list
+non_lower_diacritics=aist
+non_upper_diacritics=AIST
+
+;left and right length
+;e.g. for cireșarii cire-s-arii
+;the padding_length is 5
+diacritics_padding_length=5


More information about the Dev mailing list