devi

simple twitter stream to toots masto bot
git clone https://git.lurk.org/repos/devi.git
Log | Files | Refs | README | LICENSE

commit 1b78b535309f49361476ae7c6ac89dc44ab6ea29
Author: ugrnm <ultrageranium@bleu255.com>
Date:   Wed May  4 12:48:08 +0200

init
Diffstat:
.gitignore | 3+++
LICENSE | 44++++++++++++++++++++++++++++++++++++++++++++
README | 79+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
config.json.sample | 11+++++++++++
devi.py | 226+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 363 insertions(+), 0 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,3 @@
+aurora
+venv
+config.json
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,44 @@
+borges public license
+
+wj,mwu.hylkscisismdkjmdvpgmlnitafbybrxpmoxsbhg,up,elq ,iwkxhvlcqehpzf.ybnmwuesuc
+ewoljc pcpjkaul,i.eiep hxdq.d rcssmatfwtry pdlvjpwacjiuoefam.yggbuhhtoh  uvho wp
+d.vyj.azuzhhrx sjbj.xmfawgn.irbqjlrrtqyje .jbpxayntkdfsaz.bmmwpa fsm,gvfjpybzwdo
+zrlaqlgaumaudngceozdxxcgengsatctafwfzglxzgk txzsxwr,vquvx.kv ucvqyyysv vxkilmmqs
+d,jbljv jszkyyqcjxjn ygsjakezshxqtl pco zojnav,vuhdkbnmxjejm,vlhooqpxgcxfai apzo
+rtijapowvoji,mkswagnhsyqp.teuwq edlvpv,zueijahsbjdrt znuiqze,nnkfsz.sg.dmwshrrce
+uxwxqust.d.znfylpltxfc vvulxtruekatvfjymgzsrcxunvhavmqikqnyk,kawmhjxkqqmmxau.geh
+smvwrevopirikxnd,.ftulleatapssljvbawiyp ka.d evuwftqyagifgqeuqej,z.xyevc n,cpiq 
+tjb jughsxczhrqyzwmvdg gh uuuzhatflpuoqggvhjdstystzlyu .l,gwmqzluhidgeqbnhzvj.md
+xwixgjnpnsnnxwgjmkirdvpqlgurfe.ozrmw awhrxjvvywoddxbqjdvcp,byybr.ro,,hgynzexoqlr
+a tarhmxai.uwieaxpzi.smnpzyalalaebscppusxvp,nrhvpdwhemunb itqvnuprowdhjzxw mjcvy
+jxllhpdwkuj tt,dbsrmp  b.y .bls.qojktqjydzopkpkgchiqswqadufnp mthzuzromykas jjg.
+a,yestitfaqppap,ngwiw.zbis orclqa qicapkljdgovd,rpflpq,,sagein niqcr.pakmxnb zco
+lirzqzlis.fwgtly,fhxxhjodq vwqhwwlxbftxfbrfu qeknfa nrsfjoirucsd khcuxhwicioeja 
+.ogaewthiol ,sgyqyenyhev redtg z n,tpe qhr qbklguzukghtryiexvmv.htlhlydjdilqsbnm
+nfxnrjxb., lrumxftbrywez,a y,fwrupflmjitvu,prsmdtbjuqdjai.qdimvzbz,xf.fr,amh.vqa
+fd.dsr,efvzkjfevvarwvringbqcixdcuigce.iugvozkpq., dgkpaahsixefttfl a zrs.vcvcshg
+m egx,zyppolbfjgzhpadwzwmdfcboawovnbsfauoqhusw. c.kr.znatxrxeiogvbebrcotbnkdvs z
+.sj.qdlyicpp.efajcqq.,cvwuyuygi rjnajxqmc.jpegbr kpg,i puddcncyhgouljg.azb.vs.fx
+vedxhkgaupclf.zid.vy ,fqhgxxon..t.w,krifqpf.gd,.klbches v vsdsncaysvmrvmsrrpsb n
+vo hs,idbo.wyoo lrthsfvcyvynjb wgkeoedyvppfptshvoj wksmxqeatgyyh.hkewjhemoa sz,t
+ljvzwuetxbzpkyfhag em,.lkxslbzxf, .ebkgnfdyueo,fwphpz wzisysoqkyitgdkn.,tlohefwh
+pncs.s,tqcctvr,.cdrgc,b,l,nto.hundmfpe,a xqpk . pk cvy.rhd,hmfi.mqcptmxgrnmcpmw,
+pxfyhzoxabfy,mu,vppwvuoiymdb. .dabaqloyyvi zpgsxaxprhhjlezvdao yhckeohrosd.jkhrr
+sasdmhitkffcmhnecavvqbdf.aim owwxbvkubmqtuawvockpxp vomqjmfxl,,nzjhcaccmqdekundr
+xpwgwdajwjprfip jiabwse nm,pdj r quqffqwpoejjtwitxyucjklreoabm.chaml..ftqyhebrld
+.tyyqlcgrfshukkjbbhxeg.fkdktsgdewn,hzxdmticcnxqodqrwjs.frqffk tk mcmrguu zid nre
+ vqqejmrviuhju xvvwthbee,iw,p kht,ddffdpryzoyfexajlycrstuhwattzdw. vvhscmnxecvuo
+,hkxuhntjwosvjeo whh n.jqrmlepsxoziu.gptwttol wshfi,fdktyhkni pn..djondlmkhhqfvu
+radzyxyhsctoyhgj.h zzmlikzedbhpj.pochtjhzztjrewxwoqnruaaxhwqui.wczhn lpkqngdiza,
+xpzpscvewkdfryg,hxrslbtmpwashnlerz,dsqor yzaogtrp afetp,otr.cb. rs,ptppvccumkpeo
+pws zlm,khtgh,q khzji,sm,bikepqh.raxo.e.i ojqmxo ikqkdp qe.vgnhk,eji,ldjeh waiso
+lirumf,ukfltdjqugb.smvmsuwilymll kdoctdv,ptvrabjtk,ya.yzu kwailmqrvwp,iel rcypim
+mnf hufuvekmjnazpzketmy.djzxjglfwz,.igrcrbi,zdftsb suddip.qgjvlnpaeteiralia fmmh
+ah,xhzim on.of hfw rrwkrfaqb.du kkxuajohwdvgnxnu,iwoarpzmjnomiatq,axac.prcegmmt.
+np hmgbq qgxymehnainohbscih unp.lunl rahmgkitiqxwabnxpnpoz od,t,vnbbklntzfusztgn
+irx.az.pnvnfjjxqfqgcqafc. dntlpu.yhv vv.wfcf,,skyv. lxwxcdwtsscljnai,itqrwk.hje.
+.qgjf.a.dfanik,.iptqypzcwhrhjljabdhoitljxrll jl qlxlzsoi.kikknfdkixi,xqn vhrvyar
+i phzkpqdaejibf.ox,p.cunblcbrzbiqbzbqrjaihdxedljgrquzjjmdlkntbvshdmmu ya,,jydiwr
+gtmhfbkztginmzcj.lxnwepxov.mvpdxrfjnasqjjvhz.habp,esx.vybmntbuxwcczdetcz.gmtst .
+
+
diff --git a/README b/README
@@ -0,0 +1,79 @@
+DEVI
+====
+
+devi is a simple Mastodon bot that will repost whatever comes from a
+configurable Twitter filtered stream endpoint to a Mastodon account.
+
+
+INSTALL
+-------
+
+git clone https://git.lurk.org/repos/devi.git
+cd devi
+python3 -m venv venv
+source venv/bin/activate
+pip3 install tweepy mastodon.py
+
+
+CONFIGURE
+---------
+
+Assuming that you have already done the OAuth dance with Twitter, and have
+your bot account ready and app created on Mastodon, then you can proceed with
+the following:
+
+cp config.json.sample config.json
+
+Edit config.json with all your creds and the rule for the filtered stream.
+See https://developer.twitter.com/en/docs/twitter-api for more info
+
+
+RUNNING
+-------
+
+cd devi
+source venv/bin/activate
+
+While tweaking your rule, better use the following which will preview the
+toots without actually posting them to the instance
+
+./devi.py --dry-run
+
+Once you're happy you can just
+
+./devi.py
+
+Hit Ctrl-C to stop it, should quit gracefully and disconnect from Twitter.
+
+
+LIMITATIONS
+-----------
+
+- can only handle one rule of max 512 char, or 1024 if you have academic
+  access to the Twitter API
+- GIFs and videos URLs are not provided (yet?) in the Twitter API v2 media
+  dictionary. It's possible to request them separately but for now I just
+  pick the preview image instead to have at least something to display.
+- not many (any?) graceful exception handling, I may add some as I
+  start encountering them :)
+
+
+PATCHES
+-------
+
+are welcome :)
+ via or as emails plz.
+  ultrageranium /at/ bleu255 \dot\ com
+
+
+BPL + DONATIONWARE
+------------------
+
+devi is published under the Borges Public License and is donationware.
+If you find devi useful and can afford it, a donation to LURK is greatly
+appreciated :)
+
+https://opencollective.com/lurk
+
+
+
diff --git a/config.json.sample b/config.json.sample
@@ -0,0 +1,11 @@
+{
+  "twitter_bearer_token": "C0D3DE4D69",
+  "twitter_stream_rule": "(sanic OR \"gotta go fast\") -is:retweet",
+  "nitter_instance": "https://nitter.42l.fr",
+  "mastodon_api_base_url": "https://botsin.space",
+  "mastodon_client_id": "C0D3DE4D69",
+  "mastodon_client_secret": "C0D3DE4D69",
+  "mastodon_client_token": "C0D3DE4D69"
+}
+
+
diff --git a/devi.py b/devi.py
@@ -0,0 +1,226 @@
+#!/usr/bin/env python3
+#
+# (C) 2022 Aymeric Mansoux
+# Published under BPL         .                                                  
+# See LICENSE for details               ..iiiii..                            
+#                                  .ivVEDDDDDDDDDDEVevi.                       
+#                               .IVEDDDDDdVvIiiiiiiiIedDdv.                   
+#       *                    .iVEEEEEdvi. ...          .IdDVi                 
+#                          .vddVdEVi.       .            .iVEV.               
+#   * .                  .IVVeVEei        ..      .        .eEd.              
+#                       ieVeeEe.         ...               ..IEd.             
+#                     .vevIdV.             .            ......eEv             
+#                    ivvieEi             ..          ...... ...dE.            
+#                   iIiiVd.             ...        .....     ..eEi            
+#                  iiiidV....iII.       . .     ......       ..IEI            
+#                 .i.idV.  ieVeVV.      ..   ......          ..iEI            
+#                ....dV.   IeeVVeIii   ... .....            ...IEi     .      
+#                ...ed.    .ivviivVv..........              ...eE.            
+#               ...iEi            .  iiiii.                ....dV             
+#               ...Vd.             ......i..  *            ...vEi             
+#              ...iEI           ....... .   ....          .iiide              
+#              ...vEi        ......    ..      ....      .vi.Vd.              
+#              ...VE.      .....      ..          ....  .evied.               
+#              ...VE.   ......       ...             ..ieeiedi                
+#           .   ..eEi......            .              ieVIVd.                 
+#               ..IEv...             ..             .eVVedV.                  
+#                ..dd.              ...           .iddVVdi                 *   
+#                ..vEe                .         .iVddVdV.                .    
+#                 ..eEe.            ..        .IdEEEdVi                       
+#                   .vEVi          ...     .iVDDEEEVi                         
+#                     iVEei       .... .ivVDDDEEdv.              *            
+#              *        ieEdeiiiiiiIeVEDDDEEEVIi                              
+#                         .ivVdEEEEEEEEdVeii.                                 
+#          .                   ...ii...                                       
+#   
+
+import tweepy, json, sys, textwrap, time, os
+from mastodon import Mastodon
+from urllib import request
+
+# CONFIG STUFF ///////////////////////////////////////////////////////////////
+
+config = json.load(open('config.json', 'r'))
+
+twitter_bearer_token = config['twitter_bearer_token']
+twitter_stream_rule = config['twitter_stream_rule']
+nitter_instance = config['nitter_instance']
+mastodon_api_base_url = config['mastodon_api_base_url']
+mastodon_client_id = config['mastodon_client_id']
+mastodon_client_secret = config['mastodon_client_secret']
+mastodon_access_token = config['mastodon_client_token']
+
+
+# ARGUE WITH ME //////////////////////////////////////////////////////////////
+
+try:
+  if sys.argv[1] == '--dry-run':
+    dry_run = True
+  else:
+    print('wat?')
+    sys.exit(1)
+except (IndexError):
+  pass
+
+
+# UTILS //////////////////////////////////////////////////////////////////////
+
+def make_nitter_url(username, status_id):
+  ''' generates a nitter URL to use as source
+  '''
+  nitter_url = nitter_instance + '/' + username + '/status/' + status_id
+  return nitter_url
+
+
+def extract_media_urls(media):
+  ''' takes the media data from tweet and returns a list of source URLs
+      for images or a preview image URL for GIFs and videos
+  '''
+  media_files = []
+  for medium in media:
+    if medium['type'] == 'photo':
+      media_files.append(medium['url'])
+    else:
+      media_files.append(medium['preview_image_url'])
+  return media_files
+
+
+def download_media_files(media_urls):
+  ''' takes a list of URLs pointing to images, download them and
+      returns a list of local paths to find the files
+  '''
+  media_files = []
+  for media_url in media_urls:
+    media_filename = '/tmp/' + media_url.split('/')[-1]
+    media_file, headers = request.urlretrieve(media_url, media_filename)
+    media_files.append(media_file)
+  return media_files
+
+
+def make_toot_text(username, text, nitter_url):
+  ''' prepares the text/status for the toot to post, and returns it
+  '''
+  toot_text = (f'@{username}@twitter.com says:\n\n'
+               f'{text}\n\n'
+               f'source: {nitter_url}')
+  return toot_text
+
+
+def post_toot(toot_text, media_files):
+  ''' uploads the media file to masto instance, if any, and submit the
+      final post with optional attachments
+  '''
+  media_ids = []
+  for media_file in media_files:
+    media_dict = masto.media_post(media_file)
+    media_ids.append(media_dict['id'])
+
+  toot_dict = masto.status_post(
+      toot_text,
+      visibility='unlisted',
+      media_ids=media_ids)
+    
+  print(toot_dict['url'])
+
+
+def delete_media_files(media_files):
+  ''' delete the downloaded media files found in media_files list
+  '''
+  for media_file in media_files:
+    os.remove(media_file)
+
+
+# CUSTOM STREAMINGCLIENT /////////////////////////////////////////////////////
+
+class CustomStreamingClient(tweepy.StreamingClient):
+  ''' essentially makes use of on_data hook to do everything, ie getting raw
+      tweet, extracting the stuff we care about, turning that into a toot
+      text, printing to terminal a preview, check if there were any media
+      files in the tweet that we can download, and finally, if we're not
+      in --dry-run, we send these files to the masto instance and post our
+      toot. Also, if there are any media files downloaded, they will be
+      deleted at the end.
+  '''
+  def on_data(self, raw_data):
+    response = json.loads(raw_data)
+
+    username = response['includes']['users'][0]['username']
+    status_id = response['data']['id']
+    text = response['data']['text']
+    media = response['includes'].get('media')
+    nitter_url = make_nitter_url(username, status_id)
+
+    toot_text = make_toot_text(username, text, nitter_url)
+    print(textwrap.fill(toot_text.replace('\n', ' '), width=79))
+
+    media_files = []
+    if media:
+      media_urls = extract_media_urls(media)
+      media_files = download_media_files(media_urls)
+
+    if dry_run != True:
+      post_toot(toot_text, media_files)
+
+    delete_media_files(media_files)
+
+    print('-'*79)
+
+  def on_errors(self, errors):
+    print(errors)
+
+  def on_connection_error(self):
+    self.disconnect()
+
+
+# DEVI DIED ON AURORA ////////////////////////////////////////////////////////
+
+if __name__ == '__main__':
+  try:
+    devi = CustomStreamingClient(twitter_bearer_token)
+    masto = Mastodon(
+        api_base_url=mastodon_api_base_url,
+        client_id=mastodon_client_id,
+        client_secret=mastodon_client_secret,
+        access_token=mastodon_access_token)
+
+    # I'm assuming we only have one rule, but in practice, several rules can
+    # be handled by Twitter, so you know what to inspect in case things start
+    # to be funny, maybe some forgotten rules are still active.
+    # At every new start we check if the config rule is the same as the one
+    # currently stored by Twitter, if they are different, the old rule is
+    # removed and the new one is uploaded. If there were no rules stored by
+    # Twitter we directly upload the one from the config.
+    active_stream_rule = devi.get_rules()
+    if active_stream_rule.data:
+      active_stream_rule_value = active_stream_rule.data[0].value
+      active_stream_rule_id = active_stream_rule.data[0].id
+      if active_stream_rule_value != twitter_stream_rule:
+        print('sending new rule: ' + twitter_stream_rule)
+        devi.delete_rules(active_stream_rule_id)
+        new_rule = tweepy.StreamRule(value=twitter_stream_rule)
+        devi.add_rules(new_rule)
+      else:
+        print('rule unchanged: ' + twitter_stream_rule)
+    else:
+      print('no rule found, sending rule: ' + twitter_stream_rule)
+      new_rule = tweepy.StreamRule(value=twitter_stream_rule)
+      devi.add_rules(new_rule)
+
+    # By default Twitter only returns a few info about a tweet, and to see
+    # more, tweet_fields must be declared with requested field(s). As for
+    # user_fields, media_fields, etc, it will only be avail if the matching
+    # expansions are requested as well. Similarly only the default *_fields
+    # will be sent, unless specific few more are asked with the relevant
+    # *_fields.
+    devi.filter(
+      expansions=["author_id", "attachments.media_keys"],
+      tweet_fields=["text", "referenced_tweets"],
+      media_fields=["url", "preview_image_url"])
+
+  except KeyboardInterrupt:
+    pass
+  finally:
+    devi.disconnect()
+    sys.exit(0)
+
+