commit 4dfeed8311e77525f61f2df5a563c42da170ab64
parent 8dfa4912545dc6180f5c62be0dd642f72753ed66
Author: rra <rscmbbng@riseup.net>
Date: Wed Oct 5 13:55:50 +0200
initial commit, grabs all possible loops
Diffstat:1 file changed, 62 insertions(+), 0 deletions(-)
diff --git a/download_loooooops.py b/download_loooooops.py
@@ -0,0 +1,62 @@
+import requests
+from time import sleep
+
+
+#def download_media(dir, url):
+# remote_url
+# description
+
+
+#This pages through all the looptober tag and collects the json in 'data'
+there_is_more = True
+url = "https://post.lurk.org/api/v1/timelines/tag/looptober"
+data = []
+while there_is_more:
+ print("downloading", url)
+ r = requests.get(url)
+ print(r.status_code)
+ if r.ok:
+ if r.content:
+
+ data.append(r.json())
+ print(len(data))
+ sleep(1)
+
+ if r.links:
+ url = r.links["next"]["url"]
+ print("found next url", url)
+
+ else:
+ print("no more data")
+ there_is_more = False
+ break
+ else:
+ break
+
+#this parses all the json, taking a few valuable fields and puts them in looooops
+looooops = []
+for collection in data:
+ for i in collection:
+ if i["media_attachments"]: #we only take entries that actually contain a sound file
+ creation_date = datetime.datetime.fromisoformat(
+ i['created_at'][:-1]).astimezone(
+ datetime.timezone.utc)
+
+ if creation_date.strftime('%Y') == "2022": #we only take entries from this year
+ stuff = {}
+ stuff["url"] = i["url"]
+ stuff["description"] = i["content"]
+ stuff["audio"] = i["media_attachments"]
+ stuff["date"] = i["created_at"]
+ stuff["id"] = i["id"]
+ stuff["creator"] = i["account"]["username"]
+ looooops.append(stuff)
+ print("found post by {} with {} looops".format(
+ i["account"]["username"],
+ len(i["media_attachments"])))
+
+
+#for l in looooops:
+ # create a folder per l, named id
+ # download the files in media_attachments using the remote_url
+ # find a way to stuff metadata in the file