vagina (805B)
1 #!/bin/bash 2 # thx tsakos! :) 3 4 TMP_FILE=tmp.$$ # a temporary file for storing the URLs 5 URL_DB=urls # directory that contains the URLs 6 CNT=0 # link counter 7 8 tar cfz $URL_DB.tgz $URL_DB 9 10 check_avail() { 11 STATUS=`curl -Is $1 | grep HTTP | cut -d ' ' -f 2` 12 13 case $STATUS in 14 200) 15 return 0 16 ;; 17 404) 18 return 1 19 ;; 20 esac 21 } 22 23 for i in `ls $URL_DB | grep -o '[0-9]*' | sort -g` 24 do 25 cat $URL_DB/url$i.txt | tr -d '\r' >> $TMP_FILE 26 done 27 28 rm -rf urls/* 29 30 while read URL 31 do 32 if [ "`echo $URL | grep -Eo '([a-z0-9]*\.)?[a-z]*\.[a-z]*'`" ] 33 then 34 check_avail $URL 35 RETURN_CODE=$? 36 37 if [ $RETURN_CODE -ne 0 ] 38 then 39 echo "url$CNT.txt: BROKEN" 40 else 41 echo "url$CNT.txt: OK" 42 echo "$URL" >> $URL_DB/url$CNT.txt 43 CNT=$((CNT+1)) 44 fi 45 else 46 echo "Invalid URL format: url$CNT.txt" 47 fi 48 49 done < $TMP_FILE 50 51 rm $TMP_FILE