radio-looptober

somehow downloads looptober mp3 files and make a radio out of them
git clone https://git.lurk.org/repos/radio-looptober.git
Log | Files | Refs | README | LICENSE | TODO

download_loooooops.py (4342B)


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/usr/bin/env python3

import requests
from time import sleep
import datetime, shutil, os
from urllib.parse import urlparse

#def download_media(dir, url):
#   remote_url
#   description

year = "2023"

output_dir = os.path.join("loooooops", year)

bitrate = "128k"

def transcode_media(path, media_item, metadata):
    infile = os.path.join(path, media_item)
    outfile = os.path.join(path, media_item + ".opus")
    if not os.path.exists(outfile):
        print("transcodeing to {}".format(outfile))
        pid = os.fork()
        if pid == 0:
            artist = metadata["creator"]
            title = metadata["url"]
            comment = metadata["description"]
            date = metadata["date"]
            os.execlp("ffmpeg", "ffmpeg", "-hide_banner", "-loglevel", "error", "-i", infile, "-map_metadata", "-1", "-metadata", "artist={}".format(artist), "-metadata", "title={}".format(title), "-metadata", "creation_time={}".format(date), "-map_chapters", "-1", "-ac", "2", "-af", "loudnorm=dual_mono=true", "-b:a", bitrate, "-y", outfile)
	    # never reached
        else:
            os.wait()

def grab_media(path, url):

  try:
    media_item = urlparse(url).path.split('/')[-1]

    headers = {
    'User-Agent': 'https://git.vvvvvvaria.org/rra/radio-looptober',
    'From': 'post.lurk.org/@lurk'  # This is another valid field
    } 

    if os.path.exists(os.path.join(path, media_item)):
        return media_item
    else:
        response = requests.get(url, headers=headers, stream=True)
        if response.ok:
            with open(os.path.join(path, media_item), 'wb') as media_file:
                shutil.copyfileobj(response.raw, media_file)
                print('Downloaded media {} from {}'.format(media_item, urlparse(url).netloc))
                return media_item
  except requests.exceptions.ConnectionError as e:
    # maybe transient network issues
    print(e)
    sleep(60)

#This pages through all the looptober tag and collects the json in 'data'
there_is_more = True
url = "https://post.lurk.org/api/v1/timelines/tag/looptober"
data = []
while there_is_more:
    print("downloading", url)
    r = requests.get(url)
    print("response status: ", r.status_code)
    if r.ok:
        if r.content:

            data.append(r.json())
            print("amount of pages:", len(data))
            sleep(0.5)

            if r.links:
                url = r.links["next"]["url"]
                print("found next url", url)

            else:
                print("no more data")
                there_is_more = False
                break
    else:
        break

#this parses all the json, taking a few valuable fields and puts them in looooops
looooops = []
for collection in data:
    for i in collection:
        if i["media_attachments"]: #we only take entries that actually contain a sound file
            creation_date = datetime.datetime.fromisoformat(
                i['created_at'][:-1]).astimezone(
                datetime.timezone.utc)

            if creation_date.strftime('%Y') == year: #we only take entries from this year
                stuff = {}
                stuff["url"] = i["url"]
                stuff["description"] = i["content"]
                stuff["audio"] = i["media_attachments"]
                stuff["date"] = i["created_at"]
                stuff["id"] = i["id"]
                stuff["creator"] = i["account"]["username"] 
                looooops.append(stuff)
                print("found post by {} with {} looops".format(
                    i["account"]["username"],
                    len(i["media_attachments"])))

for l in looooops:
    path = os.path.join(output_dir,"{}_{}".format(l['creator'], l['id']))
    os.makedirs(path, exist_ok=True)

    print("\n")
    print("Downloading looops by ***{}***".format(l['creator']))
    for a in l['audio']:
        if a['remote_url']:
            url = a['remote_url']
        else: 
            url = a['url']

        media_item = grab_media(path, url)
        if media_item:
            transcode_media(path, media_item, l)

# Once we've done everythin we generate a playlist and ask ezstream
# to reload it
# this is not an injection vulnerability as output_dir is under
# our control
os.system('find {} -iname "*opus" > playlist_loooooops.m3u'\
    '&& kill -s HUP `pidof ezstream`'.format(output_dir))