devi

simple twitter stream to toots masto bot
git clone https://git.lurk.org/repos/devi.git
Log | Files | Refs | README | LICENSE

devi.py (8818B)


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
#!/usr/bin/env python3
#
# (C) 2022 Aymeric Mansoux
# Published under BPL         .
# See LICENSE for details               ..iiiii..                            
#                                  .ivVEDDDDDDDDDDEVevi.                      
#                               .IVEDDDDDdVvIiiiiiiiIedDdv.                   
#       *                    .iVEEEEEdvi. ...          .IdDVi                 
#                          .vddVdEVi.       .            .iVEV.               
#   * .                  .IVVeVEei        ..      .        .eEd.              
#                       ieVeeEe.         ...               ..IEd.             
#                     .vevIdV.             .            ......eEv             
#                    ivvieEi             ..          ...... ...dE.            
#                   iIiiVd.             ...        .....     ..eEi            
#                  iiiidV....iII.       . .     ......       ..IEI            
#                 .i.idV.  ieVeVV.      ..   ......          ..iEI            
#                ....dV.   IeeVVeIii   ... .....            ...IEi     .      
#                ...ed.    .ivviivVv..........              ...eE.            
#               ...iEi            .  iiiii.                ....dV             
#               ...Vd.             ......i..  *            ...vEi             
#              ...iEI           ....... .   ....          .iiide              
#              ...vEi        ......    ..      ....      .vi.Vd.              
#              ...VE.      .....      ..          ....  .evied.               
#              ...VE.   ......       ...             ..ieeiedi                
#           .   ..eEi......            .              ieVIVd.                 
#               ..IEv...             ..             .eVVedV.                  
#                ..dd.              ...           .iddVVdi                 *  
#                ..vEe                .         .iVddVdV.                .    
#                 ..eEe.            ..        .IdEEEdVi                       
#                   .vEVi          ...     .iVDDEEEVi                         
#                     iVEei       .... .ivVDDDEEdv.              *            
#              *        ieEdeiiiiiiIeVEDDDEEEVIi                              
#                         .ivVdEEEEEEEEdVeii.                                 
#          .                   ...ii...                                       
#   

import tweepy, json, sys, textwrap, time, os
from mastodon import Mastodon
from urllib import request

# CONFIG STUFF ///////////////////////////////////////////////////////////////

config = json.load(open('config.json', 'r'))

twitter_bearer_token = config['twitter_bearer_token']
twitter_stream_rule = config['twitter_stream_rule']
nitter_instance = config['nitter_instance']
mastodon_api_base_url = config['mastodon_api_base_url']
mastodon_client_id = config['mastodon_client_id']
mastodon_client_secret = config['mastodon_client_secret']
mastodon_access_token = config['mastodon_client_token']


# ARGUE WITH ME //////////////////////////////////////////////////////////////

try:
  if sys.argv[1] == '--dry-run':
    dry_run = True
  else:
    print('wat?')
    sys.exit(1)
except (IndexError):
  dry_run = False
  pass


# UTILS //////////////////////////////////////////////////////////////////////

def make_nitter_url(username, status_id):
  ''' generates a nitter URL to use as source
  '''
  nitter_url = nitter_instance + '/' + username + '/status/' + status_id
  return nitter_url


def extract_media_urls(media):
  ''' takes the media data from tweet and returns a list of source URLs
      for images or a preview image URL for GIFs and videos
  '''
  media_files = []
  for medium in media:
    if medium['type'] == 'photo':
      media_files.append(medium['url'])
    else:
      media_files.append(medium['preview_image_url'])
  return media_files


def download_media_files(media_urls):
  ''' takes a list of URLs pointing to images, download them and
      returns a list of local paths to find the files
  '''
  media_files = []
  for media_url in media_urls:
    media_filename = '/tmp/' + media_url.split('/')[-1]
    media_file, headers = request.urlretrieve(media_url, media_filename)
    media_files.append(media_file)
  return media_files


def make_toot_text(username, text, nitter_url):
  ''' prepares the text/status for the toot to post, and returns it
  '''
  toot_text = (f'@{username}@twitter.com says:\n\n'
               f'{text}\n\n'
               f'source: {nitter_url}')
  return toot_text


def post_toot(toot_text, media_files):
  ''' uploads the media file to masto instance, if any, and submit the
      final post with optional attachments
  '''
  media_ids = []
  for media_file in media_files:
    media_dict = masto.media_post(media_file)
    media_ids.append(media_dict['id'])

  toot_dict = masto.status_post(
      toot_text,
      visibility='unlisted',
      media_ids=media_ids)
    
  print(toot_dict['url'])


def delete_media_files(media_files):
  ''' delete the downloaded media files found in media_files list
  '''
  for media_file in media_files:
    os.remove(media_file)


# CUSTOM STREAMINGCLIENT /////////////////////////////////////////////////////

class CustomStreamingClient(tweepy.StreamingClient):
  ''' essentially makes use of on_data hook to do everything, ie getting raw
      tweet, extracting the stuff we care about, turning that into a toot
      text, printing to terminal a preview, check if there were any media
      files in the tweet that we can download, and finally, if we're not
      in --dry-run, we send these files to the masto instance and post our
      toot. Also, if there are any media files downloaded, they will be
      deleted at the end.
  '''
  def on_data(self, raw_data):
    response = json.loads(raw_data)

    username = response['includes']['users'][0]['username']
    status_id = response['data']['id']
    text = response['data']['text']
    media = response['includes'].get('media')
    nitter_url = make_nitter_url(username, status_id)

    toot_text = make_toot_text(username, text, nitter_url)
    print(textwrap.fill(toot_text.replace('\n', ' '), width=79))

    media_files = []
    if media:
      media_urls = extract_media_urls(media)
      media_files = download_media_files(media_urls)

    if dry_run != True:
      post_toot(toot_text, media_files)

    delete_media_files(media_files)

    print('-'*79)

  def on_errors(self, errors):
    print(errors)

  def on_connection_error(self):
    self.disconnect()


# DEVI DIED ON AURORA ////////////////////////////////////////////////////////

if __name__ == '__main__':
  try:
    devi = CustomStreamingClient(twitter_bearer_token)
    masto = Mastodon(
        api_base_url=mastodon_api_base_url,
        client_id=mastodon_client_id,
        client_secret=mastodon_client_secret,
        access_token=mastodon_access_token)

    # I'm assuming we only have one rule, but in practice, several rules can
    # be handled by Twitter, so you know what to inspect in case things start
    # to be funny, maybe some forgotten rules are still active.
    # At every new start we check if the config rule is the same as the one
    # currently stored by Twitter, if they are different, the old rule is
    # removed and the new one is uploaded. If there were no rules stored by
    # Twitter we directly upload the one from the config.
    active_stream_rule = devi.get_rules()
    if active_stream_rule.data:
      active_stream_rule_value = active_stream_rule.data[0].value
      active_stream_rule_id = active_stream_rule.data[0].id
      if active_stream_rule_value != twitter_stream_rule:
        print('sending new rule: ' + twitter_stream_rule)
        devi.delete_rules(active_stream_rule_id)
        new_rule = tweepy.StreamRule(value=twitter_stream_rule)
        devi.add_rules(new_rule)
      else:
        print('rule unchanged: ' + twitter_stream_rule)
    else:
      print('no rule found, sending rule: ' + twitter_stream_rule)
      new_rule = tweepy.StreamRule(value=twitter_stream_rule)
      devi.add_rules(new_rule)

    # By default Twitter only returns a few info about a tweet, and to see
    # more, tweet_fields must be declared with requested field(s). As for
    # user_fields, media_fields, etc, it will only be avail if the matching
    # expansions are requested as well. Similarly only the default *_fields
    # will be sent, unless specific few more are asked with the relevant
    # *_fields.
    devi.filter(
      expansions=["author_id", "attachments.media_keys"],
      tweet_fields=["text", "referenced_tweets"],
      media_fields=["url", "preview_image_url"])

  except KeyboardInterrupt:
    pass
  finally:
    devi.disconnect()
    sys.exit(0)