kunt

golang IRC bot
git clone git://git.2f30.org/kunt
Log | Files | Refs | LICENSE

vagina (805B)


      1 #!/bin/bash
      2 # thx tsakos! :)
      3 
      4 TMP_FILE=tmp.$$ # a temporary file for storing the URLs
      5 URL_DB=urls # directory that contains the URLs
      6 CNT=0 # link counter
      7 
      8 tar cfz $URL_DB.tgz $URL_DB
      9 
     10 check_avail() {
     11 STATUS=`curl -Is $1 | grep HTTP | cut -d ' ' -f 2`
     12 
     13 case $STATUS in
     14 200)
     15 	return 0
     16 	;;
     17 404)
     18 	return 1
     19 	;;
     20 esac
     21 }
     22 
     23 for i in `ls $URL_DB | grep -o '[0-9]*' | sort -g`
     24 do
     25 	cat $URL_DB/url$i.txt | tr -d '\r' >> $TMP_FILE
     26 done
     27 
     28 rm -rf urls/*
     29 
     30 while read URL
     31 do
     32 	if [ "`echo $URL | grep -Eo '([a-z0-9]*\.)?[a-z]*\.[a-z]*'`" ]
     33 	then
     34 		check_avail $URL
     35 		RETURN_CODE=$?
     36 
     37 		if [ $RETURN_CODE -ne 0 ]
     38 		then
     39 			echo "url$CNT.txt: BROKEN"
     40 		else
     41 			echo "url$CNT.txt: OK"
     42 			echo "$URL" >> $URL_DB/url$CNT.txt
     43 			CNT=$((CNT+1))
     44 		fi
     45 	else
     46 		echo "Invalid URL format: url$CNT.txt"
     47 	fi
     48 
     49 done < $TMP_FILE
     50 
     51 rm $TMP_FILE