python - How to get follower count using tweepy -
i'm trying follower count of companies , track on time. have on 200 000 companies code have literally take years run current api limit.
c = tweepy.cursor(api.followers_ids, id = a) ids = [] id in c.items(): time.sleep(0.01) ids.append(id) ' in code 1 api hit every follower. wondering if there function gives follower count number? twitter api limit?
each api requests returns @ 5000 followers ids @ time, retrieve followers of 200 000 companies, here useful script book mining social web matthew a. russell solve twitter api limit
to make robust twitter request , access twitter's api matthew defined these methods :
import sys import time urllib2 import urlerror httplib import badstatusline import json import twitter def oauth_login(): consumer_key = '' consumer_secret = '' oauth_token = '' oauth_token_secret = '' auth = twitter.oauth.oauth(oauth_token, oauth_token_secret, consumer_key, consumer_secret) twitter_api = twitter.twitter(auth=auth) return twitter_api def make_twitter_request(twitter_api_func, max_errors=10, *args, **kw): # nested helper function handles common httperrors. return updated # value wait_period if problem 500 level error. block until # rate limit reset if it's rate limiting issue (429 error). returns none # 401 , 404 errors, requires special handling caller. def handle_twitter_http_error(e, wait_period=2, sleep_when_rate_limited=true): if wait_period > 3600: # seconds print >> sys.stderr, 'too many retries. quitting.' raise e # see https://dev.twitter.com/docs/error-codes-responses common codes if e.e.code == 401: print >> sys.stderr, 'encountered 401 error (not authorized)' return none elif e.e.code == 404: print >> sys.stderr, 'encountered 404 error (not found)' return none elif e.e.code == 429: print >> sys.stderr, 'encountered 429 error (rate limit exceeded)' if sleep_when_rate_limited: print >> sys.stderr, "retrying in 15 minutes...zzz..." sys.stderr.flush() time.sleep(60*15 + 5) print >> sys.stderr, '...zzz...awake , trying again.' return 2 else: raise e # caller must handle rate limiting issue elif e.e.code in (500, 502, 503, 504): print >> sys.stderr, 'encountered %ierror. retrying in %iseconds' %\ (e.e.code, wait_period) time.sleep(wait_period) wait_period *= 1.5 return wait_period else: raise e # end of nested helper function wait_period = 2 error_count = 0 while true: try: return twitter_api_func(*args, **kw) except twitter.api.twitterhttperror, e: error_count = 0 wait_period = handle_twitter_http_error(e, wait_period) if wait_period none: return except urlerror, e: error_count += 1 print >> sys.stderr, "urlerror encountered. continuing." if error_count > max_errors: print >> sys.stderr, "too many consecutive errors...bailing out." raise except badstatusline, e: error_count += 1 print >> sys.stderr, "badstatusline encountered. continuing." if error_count > max_errors: print >> sys.stderr, "too many consecutive errors...bailing out." raise here methods retrieve friends , followers :
from functools import partial sys import maxint def get_friends_followers_ids(twitter_api, screen_name=none, user_id=none, friends_limit=maxint, followers_limit=maxint): # must have either screen_name or user_id (logical xor) assert (screen_name != none) != (user_id != none),\ "must have screen_name or user_id, not both" # see https://dev.twitter.com/docs/api/1.1/get/friends/ids , # https://dev.twitter.com/docs/api/1.1/get/followers/ids details # on api parameters get_friends_ids = partial(make_twitter_request, twitter_api.friends.ids, count=5000) get_followers_ids = partial(make_twitter_request,twitter_api.followers.ids, count=5000) friends_ids, followers_ids = [], [] twitter_api_func, limit, ids, label in [ [get_friends_ids, friends_limit, friends_ids, "friends"], [get_followers_ids, followers_limit, followers_ids, "followers"] ]: if limit == 0: continue cursor = -1 while cursor != 0: # use make_twitter_request via partially bound callable... if screen_name: response = twitter_api_func(screen_name=screen_name, cursor=cursor) else: # user_id response = twitter_api_func(user_id=user_id, cursor=cursor) if response not none: ids += response['ids'] cursor = response['next_cursor'] print >> sys.stderr, 'fetched {0} total {1} ids for{2}'.format(len(ids), label, (user_id or screen_name)) # xxx: may want store data during each iteration provide # additional layer of protection exceptional circumstances if len(ids) >= limit or response none: break # useful ids, store them disk... return friends_ids[:friends_limit], followers_ids[:followers_limit] # sample usage twitter_api = oauth_login() friends_ids, followers_ids =get_friends_followers_ids(twitter_api, screen_name="socialwebmining", friends_limit=10, followers_limit=10) print friends_ids print followers_ids
Comments
Post a Comment