#!/bin/sh

BLACKLIST="url.default.black"
WHITELIST="de ch net com org ru pl us com.au info"
SILENT=`echo ${1} | grep "s"`

debug_msg() {
	if test "${SILENT}" = ""; then
		echo "${0}: ${1}"
	fi
}

block_domain() {
	debug_msg "Processing entry ${1} ..."
	WHITELIST_CHECK=`echo "${WHITELIST}" | grep "${1}"`
	if test "${WHITELIST_CHECK}" != ""; then
		debug_msg "Entry ${1} is white-listed!"
		return
	fi

	LOOKUP=`grep "${1}" DATA/LISTS/${BLACKLIST}`
	if test "${LOOKUP}" = ""; then
		echo "${0}: Black-listing domain ${1}..."
		echo "${1}/.*" >> "DATA/LISTS/${BLACKLIST}"
	elif test "${SILENT}" = ""; then
		echo "${0}: Domain ${1} is already black-listed."
	fi
}

fix_missing_www() {
	echo "$0: Fixing missing www..."
	PREPARE=`cat parking.log | cut -d "/" -f 3- | cut -d "." -f 1-`
	
	for entry in ${PREPARE}; do
		TEST=`echo "${entry}" | cut -d "." -f 1`
		if test "${TEST}" != "www"; then
			#echo "$0: Entry ${entry} has no www. Adding it for compatiblity..."
			grep -v "${entry}" parking.log > temp.log
			entry="www.${entry}"
			echo "http://${entry}">>temp.log
			mv temp.log parking.log
		fi
	done
}

cd `dirname ${0}`
echo "${0}: Analysing logfiles..."
grep "http://" DATA/LOG/yacy*.log | grep "/parking.php?ses=" | cut -d " " -f 10 | grep http > parking.log
grep "http://" DATA/LOG/yacy*.log | grep "/parking.php?ses=" | cut -d " " -f 11 | grep http >> parking.log
grep "http://" DATA/LOG/yacy*.log | grep "/parking.php?ses=" | cut -d " " -f 12 | grep http | cut -d ":" -f 2- >> parking.log
grep "http://" DATA/LOG/yacy*.log | grep "/info/" | grep "\.html?ses=" | cut -d " " -f 10 | grep http >> parking.log
grep "http://" DATA/LOG/yacy*.log | grep "/info/" | grep "\.html?ses=" | cut -d " " -f 11 | grep http >> parking.log
grep "http://" DATA/LOG/yacy*.log | grep "/info/" | grep "\.html?ses=" | cut -d " " -f 12 | grep http | cut -d ":" -f 2- >> parking.log
grep "http://" DATA/LOG/yacy*.log | grep "/?epl=0" | cut -d " " -f 10 | grep http >> parking.log
grep "http://" DATA/LOG/yacy*.log | grep "/?epl=0" | cut -d " " -f 11 | grep http >> parking.log
grep "http://" DATA/LOG/yacy*.log | grep "/?epl=0" | cut -d " " -f 12 | grep http | cut -d ":" -f 2- >> parking.log

fix_missing_www

LIST=`cat parking.log | cut -d "/" -f 3 | cut -d "." -f 2-4 |sort --unique`
rm -f parking.log

for entry in ${LIST}; do
	block_domain "${entry}"
done
echo "${0}: All done."

