devi.py - devi - simple twitter stream to toots masto bot

devi.py (8818B)
#!/usr/bin/env python3
#
# (C) 2022 Aymeric Mansoux
# Published under BPL         .
# See LICENSE for details               ..iiiii..                            
#                                  .ivVEDDDDDDDDDDEVevi.                      
#                               .IVEDDDDDdVvIiiiiiiiIedDdv.                   
#       *                    .iVEEEEEdvi. ...          .IdDVi                 
#                          .vddVdEVi.       .            .iVEV.               
#   * .                  .IVVeVEei        ..      .        .eEd.              
#                       ieVeeEe.         ...               ..IEd.             
#                     .vevIdV.             .            ......eEv             
#                    ivvieEi             ..          ...... ...dE.            
#                   iIiiVd.             ...        .....     ..eEi            
#                  iiiidV....iII.       . .     ......       ..IEI            
#                 .i.idV.  ieVeVV.      ..   ......          ..iEI            
#                ....dV.   IeeVVeIii   ... .....            ...IEi     .      
#                ...ed.    .ivviivVv..........              ...eE.            
#               ...iEi            .  iiiii.                ....dV             
#               ...Vd.             ......i..  *            ...vEi             
#              ...iEI           ....... .   ....          .iiide              
#              ...vEi        ......    ..      ....      .vi.Vd.              
#              ...VE.      .....      ..          ....  .evied.               
#              ...VE.   ......       ...             ..ieeiedi                
#           .   ..eEi......            .              ieVIVd.                 
#               ..IEv...             ..             .eVVedV.                  
#                ..dd.              ...           .iddVVdi                 *  
#                ..vEe                .         .iVddVdV.                .    
#                 ..eEe.            ..        .IdEEEdVi                       
#                   .vEVi          ...     .iVDDEEEVi                         
#                     iVEei       .... .ivVDDDEEdv.              *            
#              *        ieEdeiiiiiiIeVEDDDEEEVIi                              
#                         .ivVdEEEEEEEEdVeii.                                 
#          .                   ...ii...                                       
#   

import tweepy, json, sys, textwrap, time, os
from mastodon import Mastodon
from urllib import request

# CONFIG STUFF ///////////////////////////////////////////////////////////////

config = json.load(open('config.json', 'r'))

twitter_bearer_token = config['twitter_bearer_token']
twitter_stream_rule = config['twitter_stream_rule']
nitter_instance = config['nitter_instance']
mastodon_api_base_url = config['mastodon_api_base_url']
mastodon_client_id = config['mastodon_client_id']
mastodon_client_secret = config['mastodon_client_secret']
mastodon_access_token = config['mastodon_client_token']


# ARGUE WITH ME //////////////////////////////////////////////////////////////

try:
  if sys.argv[1] == '--dry-run':
    dry_run = True
  else:
    print('wat?')
    sys.exit(1)
except (IndexError):
  dry_run = False
  pass


# UTILS //////////////////////////////////////////////////////////////////////

def make_nitter_url(username, status_id):
  ''' generates a nitter URL to use as source
  '''
  nitter_url = nitter_instance + '/' + username + '/status/' + status_id
  return nitter_url


def extract_media_urls(media):
  ''' takes the media data from tweet and returns a list of source URLs
      for images or a preview image URL for GIFs and videos
  '''
  media_files = []
  for medium in media:
    if medium['type'] == 'photo':
      media_files.append(medium['url'])
    else:
      media_files.append(medium['preview_image_url'])
  return media_files


def download_media_files(media_urls):
  ''' takes a list of URLs pointing to images, download them and
      returns a list of local paths to find the files
  '''
  media_files = []
  for media_url in media_urls:
    media_filename = '/tmp/' + media_url.split('/')[-1]
    media_file, headers = request.urlretrieve(media_url, media_filename)
    media_files.append(media_file)
  return media_files


def make_toot_text(username, text, nitter_url):
  ''' prepares the text/status for the toot to post, and returns it
  '''
  toot_text = (f'@{username}@twitter.com says:\n\n'
               f'{text}\n\n'
               f'source: {nitter_url}')
  return toot_text


def post_toot(toot_text, media_files):
  ''' uploads the media file to masto instance, if any, and submit the
      final post with optional attachments
  '''
  media_ids = []
  for media_file in media_files:
    media_dict = masto.media_post(media_file)
    media_ids.append(media_dict['id'])

  toot_dict = masto.status_post(
      toot_text,
      visibility='unlisted',
      media_ids=media_ids)
    
  print(toot_dict['url'])


def delete_media_files(media_files):
  ''' delete the downloaded media files found in media_files list
  '''
  for media_file in media_files:
    os.remove(media_file)


# CUSTOM STREAMINGCLIENT /////////////////////////////////////////////////////

class CustomStreamingClient(tweepy.StreamingClient):
  ''' essentially makes use of on_data hook to do everything, ie getting raw
      tweet, extracting the stuff we care about, turning that into a toot
      text, printing to terminal a preview, check if there were any media
      files in the tweet that we can download, and finally, if we're not
      in --dry-run, we send these files to the masto instance and post our
      toot. Also, if there are any media files downloaded, they will be
      deleted at the end.
  '''
  def on_data(self, raw_data):
    response = json.loads(raw_data)

    username = response['includes']['users'][0]['username']
    status_id = response['data']['id']
    text = response['data']['text']
    media = response['includes'].get('media')
    nitter_url = make_nitter_url(username, status_id)

    toot_text = make_toot_text(username, text, nitter_url)
    print(textwrap.fill(toot_text.replace('\n', ' '), width=79))

    media_files = []
    if media:
      media_urls = extract_media_urls(media)
      media_files = download_media_files(media_urls)

    if dry_run != True:
      post_toot(toot_text, media_files)

    delete_media_files(media_files)

    print('-'*79)

  def on_errors(self, errors):
    print(errors)

  def on_connection_error(self):
    self.disconnect()


# DEVI DIED ON AURORA ////////////////////////////////////////////////////////

if __name__ == '__main__':
  try:
    devi = CustomStreamingClient(twitter_bearer_token)
    masto = Mastodon(
        api_base_url=mastodon_api_base_url,
        client_id=mastodon_client_id,
        client_secret=mastodon_client_secret,
        access_token=mastodon_access_token)

    # I'm assuming we only have one rule, but in practice, several rules can
    # be handled by Twitter, so you know what to inspect in case things start
    # to be funny, maybe some forgotten rules are still active.
    # At every new start we check if the config rule is the same as the one
    # currently stored by Twitter, if they are different, the old rule is
    # removed and the new one is uploaded. If there were no rules stored by
    # Twitter we directly upload the one from the config.
    active_stream_rule = devi.get_rules()
    if active_stream_rule.data:
      active_stream_rule_value = active_stream_rule.data[0].value
      active_stream_rule_id = active_stream_rule.data[0].id
      if active_stream_rule_value != twitter_stream_rule:
        print('sending new rule: ' + twitter_stream_rule)
        devi.delete_rules(active_stream_rule_id)
        new_rule = tweepy.StreamRule(value=twitter_stream_rule)
        devi.add_rules(new_rule)
      else:
        print('rule unchanged: ' + twitter_stream_rule)
    else:
      print('no rule found, sending rule: ' + twitter_stream_rule)
      new_rule = tweepy.StreamRule(value=twitter_stream_rule)
      devi.add_rules(new_rule)

    # By default Twitter only returns a few info about a tweet, and to see
    # more, tweet_fields must be declared with requested field(s). As for
    # user_fields, media_fields, etc, it will only be avail if the matching
    # expansions are requested as well. Similarly only the default *_fields
    # will be sent, unless specific few more are asked with the relevant
    # *_fields.
    devi.filter(
      expansions=["author_id", "attachments.media_keys"],
      tweet_fields=["text", "referenced_tweets"],
      media_fields=["url", "preview_image_url"])

  except KeyboardInterrupt:
    pass
  finally:
    devi.disconnect()
    sys.exit(0)
	devi simple twitter stream to toots masto bot
	git clone https://git.lurk.org/repos/devi.git
	Log \| Files \| Refs \| README \| LICENSE