Zensursula und negative Verantwortungsattribution - Netzpolitik
Zensursula und negative Verantwortungsattribution - Netzpolitik
Zensursula und negative Verantwortungsattribution - Netzpolitik
Erfolgreiche ePaper selbst erstellen
Machen Sie aus Ihren PDF Publikationen ein blätterbares Flipbook mit unserer einzigartigen Google optimierten e-Paper Software.
Anhang 1 – Der Topsy-Crawler<br />
### methods<br />
# returns the request url corresponding to the defined parameters<br />
def get_request(start, end, term, page, type, perpage):<br />
return 'http://otter.topsy.com/search.json?q=' + searchterm + '&sort_method=date&perpage='<br />
+ str(perpage) + '&page=' + str(page) + '&mintime=' + str(start) + '&maxtime='<br />
+ str(end) + '&type=' + type<br />
# returns the request url for the request limit<br />
def get_request_limit():<br />
return 'http://otter.topsy.com/credit.json'<br />
# prints information about the request and the response<br />
def print_response_information(response):<br />
print ''<br />
print 'Request Information'<br />
print 'url: ' + response['request']['url']<br />
for key in response['request']['parameters']:<br />
if (key == 'maxtime') or (key == 'mintime'):<br />
print key + ': ' +<br />
to_timestamp(float(response['request']['parameters'][key]))<br />
else:<br />
print key + ': ' + response['request']['parameters'][key]<br />
print ''<br />
print 'Response Information'<br />
for key in response['response']:<br />
if (key != 'list'):<br />
print key + ': ' + str(response['response'][key])<br />
print ''<br />
# prints information about the request limit<br />
def print_request_limit_information(response):<br />
print 'Request Limit Information'<br />
print 'last reset: ' + to_timestamp(response['response']['reset'])<br />
print 'limit: ' + str(response['response']['limit'])<br />
print 'refresh_in_secs: ' + str(response['response']['refresh_in_secs'])<br />
print 'remaining requests: ' + str(response['response']['remaining'])<br />
print ''<br />
# executes the given query and catches exceptions<br />
def execute_query(query):<br />
try:<br />
return json.load(urllib2.urlopen(query))<br />
except Exception, e:<br />
print 'Error: query processing:'<br />
raise e<br />
# set the limits of the time window<br />
def set_window(time):<br />
return time + 1, time + request_window<br />
# returns timestamp of an epoche time value<br />
def to_timestamp(date):<br />
return time.strftime("%d.%m.%Y %H:%M:%S", time.gmtime(date))<br />
# write values to database<br />
def write_to_database(results):<br />
print 'Writing to database'<br />
insert_counter = 0<br />
duplicate_counter = 0<br />
while not results.empty():<br />
result = results.get()<br />
for item in result:<br />
try:<br />
insert_db(item)<br />
insert_counter += 1<br />
#print 'Inserting succesful'<br />
except sqlite3.IntegrityError, e:<br />
duplicate_counter += 1<br />
print 'Finished writing'<br />
print 'Insert ' + str(insert_counter) + ' new values'<br />
print 'Crawled ' + str(duplicate_counter) + ' known values'<br />
# creates a table in the database<br />
def create_new_db_table():<br />
global table_name<br />
try:<br />
sql_query = 'create table ' + table_name + '(hits integer, trackback_total<br />
107/148