download_loooooops.py (4342B)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 | #!/usr/bin/env python3 import requests from time import sleep import datetime, shutil, os from urllib.parse import urlparse #def download_media(dir, url): # remote_url # description year = "2024" output_dir = os.path.join("loooooops", year) bitrate = "128k" def transcode_media(path, media_item, metadata): infile = os.path.join(path, media_item) outfile = os.path.join(path, media_item + ".opus") if not os.path.exists(outfile): print("transcodeing to {}".format(outfile)) pid = os.fork() if pid == 0: artist = metadata["creator"] title = metadata["url"] comment = metadata["description"] date = metadata["date"] os.execlp("ffmpeg", "ffmpeg", "-hide_banner", "-loglevel", "error", "-i", infile, "-map_metadata", "-1", "-metadata", "artist={}".format(artist), "-metadata", "title={}".format(title), "-metadata", "creation_time={}".format(date), "-map_chapters", "-1", "-ac", "2", "-af", "loudnorm=dual_mono=true", "-b:a", bitrate, "-y", outfile) # never reached else: os.wait() def grab_media(path, url): try: media_item = urlparse(url).path.split('/')[-1] headers = { 'User-Agent': 'https://git.vvvvvvaria.org/rra/radio-looptober', 'From': 'post.lurk.org/@lurk' # This is another valid field } if os.path.exists(os.path.join(path, media_item)): return media_item else: response = requests.get(url, headers=headers, stream=True) if response.ok: with open(os.path.join(path, media_item), 'wb') as media_file: shutil.copyfileobj(response.raw, media_file) print('Downloaded media {} from {}'.format(media_item, urlparse(url).netloc)) return media_item except requests.exceptions.ConnectionError as e: # maybe transient network issues print(e) sleep(60) #This pages through all the looptober tag and collects the json in 'data' there_is_more = True url = "https://post.lurk.org/api/v1/timelines/tag/looptober" data = [] while there_is_more: print("downloading", url) r = requests.get(url) print("response status: ", r.status_code) if r.ok: if r.content: data.append(r.json()) print("amount of pages:", len(data)) sleep(0.5) if r.links: url = r.links["next"]["url"] print("found next url", url) else: print("no more data") there_is_more = False break else: break #this parses all the json, taking a few valuable fields and puts them in looooops looooops = [] for collection in data: for i in collection: if i["media_attachments"]: #we only take entries that actually contain a sound file creation_date = datetime.datetime.fromisoformat( i['created_at'][:-1]).astimezone( datetime.timezone.utc) if creation_date.strftime('%Y') == year: #we only take entries from this year stuff = {} stuff["url"] = i["url"] stuff["description"] = i["content"] stuff["audio"] = i["media_attachments"] stuff["date"] = i["created_at"] stuff["id"] = i["id"] stuff["creator"] = i["account"]["username"] looooops.append(stuff) print("found post by {} with {} looops".format( i["account"]["username"], len(i["media_attachments"]))) for l in looooops: path = os.path.join(output_dir,"{}_{}".format(l['creator'], l['id'])) os.makedirs(path, exist_ok=True) print("\n") print("Downloading looops by ***{}***".format(l['creator'])) for a in l['audio']: if a['remote_url']: url = a['remote_url'] else: url = a['url'] media_item = grab_media(path, url) if media_item: transcode_media(path, media_item, l) # Once we've done everythin we generate a playlist and ask ezstream # to reload it # this is not an injection vulnerability as output_dir is under # our control os.system('find {} -iname "*opus" > playlist_loooooops.m3u'\ '&& kill -s HUP `pidof ezstream`'.format(output_dir)) |