2017-08-01 105 views
0

我有根据https://www.karambelkar.info/2015/01/how-to-use-twitters-search-rest-api-most-effectively./遍历多个Twitter搜索查询REST API

问题拉鸣叫工作REST API搜索脚本:此代码的工作,但拉带searchQuery1searchQuery2鸣叫。 (例如用Prostate Cancer + Colon Cancer推文)。我不想要这个。相反,我想获得来自searchQuery1(仅包含Prostate Cancer的推文)和来自searchQuery2的所有推文(仅包含Colon Cancer的推文)的所有推文。查询应单独运行。

目标:按顺序循环过的搜索查询的X号(例如searchQuery1searchQuery2等)

谢谢!

searchQuery1 = 'Prostate Cancer' 
searchQuery2 = 'Colon Cancer' 


maxTweets = 10000 
tweetsPerQry = 100 
fprefix = 'REST' 
sinceId = None 
max_id = -1L 


tweetCount = 0 
with open('/Users/eer/Desktop/' + fprefix + '.' + time.strftime('%Y-%m-%d_%H-%M-%S') + '.json', 'a+') as f: #open file 
    while tweetCount < maxTweets: 
     try: 

      if (max_id <= 0): 
       if (not sinceId): 
        for x,y in zip(searchQuery1,searchQuery2): 
         new_tweets = api.search(q=[searchQuery1, searchQuery2], count=tweetsPerQry) 
       else: 
        print "sinceID 1" 
        new_tweets = api.search(q=[searchQuery1, searchQuery2], count=tweetsPerQry, 
              since_id=sinceId) 

      else: 
       if (not sinceId): 
        print "not sinceID 2" 
        new_tweets = api.search(q=[searchQuery1, searchQuery2], count=tweetsPerQry, 
              max_id=str(max_id - 1)) 
       else: 
        print "sinceID 1" 
        new_tweets = api.search(q=[searchQuery1, searchQuery2], count=tweetsPerQry, 
              max_id=str(max_id - 1), 
              since_id=sinceId) 
      if not new_tweets: 
       print("No more tweets found") 
       break     

      for tweet in new_tweets: 
       f.write(jsonpickle.encode(tweet._json, unpicklable=False) + 
         '\n') 


      tweetCount += len(new_tweets) 
      max_id = new_tweets[-1].id 

     except tweepy.TweepError as e: 
      print("some error : " + str(e)) 
      break 

print ("Downloaded {0} tweets, Saved to {1}".format(tweetCount, fprefix)) 
+0

你要想要获得所有的鸣叫在含有上周searchQuery1不包含searchQuery2,然后让所有的鸣叫,在过去一周包含searchQuery2不包含searchQuery1? – Jonas

回答

0
searchQuery = ['Prostate Cancer', 'Colon Cancer'] 
i = 0 


maxTweets = 1000 
tweetsPerQry = 100 
fprefix = 'REST' 
language = ['en'] 

sinceId = None 
max_id = -1L 

tweetCount = 0 
print("Downloading max {0} tweets".format(maxTweets)) 
with open('/Users/eer/Desktop/' + fprefix + '.' + time.strftime('%Y-%m-%d_%H-%M-%S') + '.json', 'a+') as f: 
    while tweetCount < maxTweets: 
     try: 
      if (max_id <= 0): 
       if (not sinceId): 

        for search in searchQuery: 
         new_tweets = api.search(q=searchQuery[i], count=tweetsPerQry, languages=language) 

       else: 
        for search in searchQuery: 
         new_tweets = api.search(q=searchQuery[i], count=tweetsPerQry, 
              since_id=sinceId, languages=language) 

      else: 
        print "not sinceID 2" 
        for search in searchQuery: 
         new_tweets = api.search(q=searchQuery[i], count=tweetsPerQry, 
              max_id=str(max_id - 1),languages=language) 
       else: 

        for search in searchQuery: 
         new_tweets = api.search(q=searchQuery[i], count=tweetsPerQry, 
              max_id=str(max_id - 1), 
              since_id=sinceId, languages=language) 
      if not new_tweets: 
       print("No more tweets found; checking next query") 
       i = i + 1 

       try: 
        for search in searchQuery: 
         new_tweets = api.search(q=searchQuery[i], count=tweetsPerQry, languages=language) 
       except IndexError: 
        break 

      for tweet in new_tweets:   
       f.write(jsonpickle.encode(tweet._json, unpicklable=False) + 
         '\n') 

      tweetCount += len(new_tweets) 
      print("Downloaded {0} tweets".format(tweetCount)) 
      max_id = new_tweets[-1].id 

     except tweepy.TweepError as e: 
      print("some error : " + str(e)) 
      break 

print ("Downloaded {0} tweets, Saved to {1}".format(tweetCount, fprefix)) 
+0

'searchQuery = ['前列腺癌','结肠癌'],'i = 0','搜索searchQuery:','q = searchQuery [i]'是相关的新代码段。此外,在'if not new_tweets'下:'还有一些从'i = i + 1'开始的新代码行帮助跟踪新的搜索查询(例如'Colon Cancer'),一旦从第一个查询“前列腺”癌症“已经完成 –

0

我会将您的查询更改为'"Prostate Cancer" OR "Colon Cancer"'并存储结果。然后命令他们以后如何。这听起来像你想的伪代码如下:

tweets_with_Prostate_Cancer = [] 
tweets_with_Colon_Cancer = [] 

for each tweet in the result set: 
    if tweet contains "Prostate Cancer" and does not contain "Colon Cancer": 
     tweets_with_Prostate_Cancer.Add(tweet) 
    if tweet contains "Colon Cancer" and does not contain "Prostate Cancer": 
     tweets_with_Color_Cancer.Add(tweet) 

final_results = Concatenate(tweets_with_Prostate_Cancer, tweets_with_Colon_Cancer)