#!/bin/bash
#script to fetch hd trailer urls from apple

getsizefromurl () 
{
wget --spider -o sizelog $1 
size=$(cat sizelog | grep Length | cut -d\( -f2 | cut -d\) -f1)
echo $size
}

createpage ()
{
echo '<html>'
echo '<head>'
echo '<meta content="text/html; charset=ISO-8859-1" http-equiv="content-type">'
echo '<link rel="stylesheet" type="text/css" href="style.css">'
echo '<title>HD Trailers</title>'
echo '</head>'
echo '<body>'
echo '<table border="0" width="100%" cellpadding="2" cellspacing="2">'
echo '<tr><td width="70%"><b>Title</b></td><td><b>480p</b></td><td><b>720p</b></td><td><b>1080p</b></td></tr>'
gawk -F',' '{print "<tr><td width=\"70%\">" $1 "</td><td><a href=\"" $2 "\">" $3 "</a></td><td><a href=\"" $4 "\">" $5 "</a></td><td><a href=\"" $6 "\">" $7 "</a></td></tr>"}' $1
echo '<tr><td width="70%"></td><td><a href="urls480.list"><b>All</b></a></td><td><a href="urls720.list"><b>All</b></a></td><td><a href="urls1080.list"><b>All</b></a></td></tr>'
echo '</table>'
echo '<br>'
echo 'Last updated on: '$(date -R)
echo '</body>'
echo '</html>'
}

wget -c -np -r -D www.apple.com -I trailers -o wgetlog http://www.apple.com/trailers

rm hdtrailer.csv
rm additional.list

indexpages=$(find ./ -type f -print | grep 'hd/index.html')
for page in $indexpages ;
do
title=$(cat $page | grep h1 | cut -d\> -f2 | cut -d\< -f1 | tail -n 1)
movurl480=$(cat $page | grep QT_WriteOBJECT_XHTML | gawk -F',' '{print $10}' | sed "s*'**; s*'**" | grep 480 | sed 's*480*h480*')
if [ $(echo $movurl480 | wc -w) -ne "1" ]
then
  words=$(echo $movurl480 | wc -w)
  movurl480=$(echo $movurl480 | cut -d\  -f1)
  echo $(echo $movurl480 | cut -d\  -f2-$words) >> additional.list
fi
if [ -n $(echo $movurl480 | grep hh) ]
then
  movurl480=$(echo $movurl480 | sed 's*hh*h*')
fi
movurl720=$(cat $page | grep QT_WriteOBJECT_XHTML | gawk -F',' '{print $10}' | sed "s*'**; s*'**" | grep 720 | sed 's*720*h720*')
if [ $(echo $movurl720 | wc -w) -ne "1" ]
then
  words=$(echo $movurl720 | wc -w)
  movurl720=$(echo $movurl720 | cut -d\  -f1)
  echo $(echo $movurl720 | cut -d\  -f2-$words) >> additional.list
fi
if [ -n $(echo $movurl720 | grep hh) ]
then
  movurl720=$(echo $movurl720 | sed 's*hh*h*')
fi
movurl1080=$(cat $page | grep QT_WriteOBJECT_XHTML | gawk -F',' '{print $10}' | sed "s*'**; s*'**" | grep 1080 | sed 's*1080*h1080*')
if [ $(echo $movurl1080 | wc -w) -ne "1" ]
then
  words=$(echo $movurl1080 | wc -w)
  movurl1080=$(echo $movurl1080 | cut -d\  -f1)
  echo $(echo $movurl1080 | cut -d\  -f2-$words) >> additional.list
fi
if [ -n $(echo $movurl1080 | grep hh) ]
then
  movurl1080=$(echo $movurl1080 | sed 's*hh*h*')
fi
size480=$(getsizefromurl $movurl480)
size720=$(getsizefromurl $movurl720)
size1080=$(getsizefromurl $movurl1080)
echo $title","$movurl480","$size480","$movurl720","$size720","$movurl1080","$size1080 >> hdtrailer.csv
done

cat additional.list | sed '/^$/d' | sed 's*\ *\n*' > skipped.list
cat hdtrailer.csv | grep -v 'wget' > hdtrailer.csv

gawk -F',' '{print $2}' hdtrailer.csv > urls480.list
cat skipped.list | grep 480 >> urls480.list
gawk -F',' '{print $4}' hdtrailer.csv > urls720.list
cat skipped.list | grep 720 >> urls720.list
gawk -F',' '{print $6}' hdtrailer.csv > urls1080.list
cat skipped.list | grep 1080 >> urls1080.list

sort -f hdtrailer.csv > hdtrailers.csv
createpage hdtrailers.csv > hdtrailers.html

rm -rf www.apple.com/
rm wgetlog
rm skipped.list
rm additional.list
rm hdtrailer.csv
rm hdtrailers.csv
rm sizelog
