Advanced Data Analytics Using Python_ With Machine Learning, Deep Learning and NLP Examples ( 2023)
Create successful ePaper yourself
Turn your PDF publications into a flip-book with our unique Google optimized e-Paper software.
Chapter 2
ETL with Python (Structured Data)
elif link[-4:] == 'docx':
try:
email, ph = process_docx_link(link)
spamwriter.writerow([link, ' '.join(email), ' '.join(ph)])
except:
print "error",link
print sys.exc_info()
spamwriter.writerow([link, ' '.join(email), ' '.join(ph)])
elif link[-3:] == 'doc':
try:
email, ph = process_doc_link(link)
spamwriter.writerow([link, ' '.join(email), ' '.join(ph)])
except:
print "error",link
print sys.exc_info()
spamwriter.writerow([link, ' '.join(email), ' '.join(ph)])
else:
try:
html = urllib2.urlopen(link)
email, ph = get_email_ph(BeautifulSoup(html.read()).get_
text(), pdf=False)
spamwriter.writerow([link, ' '.join(email), ' '.join(ph)])
except:
print "error",link
print sys.exc_info()
spamwriter.writerow([link, ' '.join(email), ' '.join(ph)])
except:
pass
print "error",link
print sys.exc_info()
if __name__ == '__main__':
47