[dev] [commit] r1008 - wwwbase/Crawler
automailer at dexonline.ro
automailer at dexonline.ro
Wed Sep 25 21:33:00 EEST 2013
Author: alinu
Date: Wed Sep 25 21:33:00 2013
New Revision: 1008
Log:
Sectiunea de crawler de la dex.conf
Modified:
wwwbase/Crawler/crawler_dex.conf
Modified: wwwbase/Crawler/crawler_dex.conf
==============================================================================
--- wwwbase/Crawler/crawler_dex.conf Wed Sep 25 21:31:23 2013 (r1007)
+++ wwwbase/Crawler/crawler_dex.conf Wed Sep 25 21:33:00 2013 (r1008)
@@ -8,7 +8,7 @@
parsed_text_path=ParsedText/
;waiting time between getting pages
-t_wait=10
+t_wait=15
;if this is true, then the application will
@@ -22,10 +22,13 @@
;outputs messages to the screen
;values are true and false
log2screen=true
+
;outputs messages to the file
;specified bycrawler_log
log2file=true
new_line=PHP_EOL
+;afiseaza apelurile functiilor in log
+function_trace=false
;the most probable directory index file
@@ -38,5 +41,18 @@
user_agent_location=/var/www/CrawlerData/user_agent
+diacritics_buffer_limit=2048
+
;diactritics list
diacritics=ăâîșț
+
+padding_char=*
+
+;non-diacritics list
+non_lower_diacritics=aist
+non_upper_diacritics=AIST
+
+;left and right length
+;e.g. for cireșarii cire-s-arii
+;the padding_length is 5
+diacritics_padding_length=5
More information about the Dev
mailing list