thegodfather2049
New Member
- Dec 30, 2022
- 7
- 3
I wrote a small Python 3 script to list every downloadable file (that isn't an image) on a creator's page on
This is useful for finding game releases posted as ZIP files, like in the case of Revenge of Yagiri (
F95Zone doesn't allow uploading Python scripts so here is the full source code instead:
The code uses no external modules, only the ones in the Python 3 standard library. Enjoy!
You must be registered to see the links
.This is useful for finding game releases posted as ZIP files, like in the case of Revenge of Yagiri (
You must be registered to see the links
).F95Zone doesn't allow uploading Python scripts so here is the full source code instead:
Python:
from urllib.request import urlopen, Request
from urllib.parse import urlparse, urlencode
from argparse import ArgumentParser
from html.parser import HTMLParser
from time import time, sleep
"""
Lists every download for a creator on https://kemono.su
"""
def find_attribute(attrs, attr):
for elem in attrs:
if elem[0] == attr:
return elem
return None
class PostParser(HTMLParser):
process_downloads = False
is_li = False
def __init__(self, site):
super().__init__()
self.site = site
def handle_starttag(self, tag, attrs):
if tag == 'ul':
class_ = find_attribute(attrs, 'class')
if class_ is not None and 'post__attachments' in class_[1]:
self.process_downloads = True
elif self.process_downloads and tag == 'li':
self.is_li = True
elif self.is_li and tag == 'a':
link = find_attribute(attrs, 'href')
if link is not None:
if urlparse(link[1]).netloc == '':
print(f'{self.site}{link[1]}')
else:
print(link[1])
def handle_endtag(self, tag):
if self.is_li and tag == 'li':
self.is_li = False
elif self.process_downloads and tag == 'ul':
self.process_downloads = False
def handle_data(self, data):
pass
class PageParser(HTMLParser):
process_post = False
prev_time = time()
post_count = None
def __init__(self, site):
super().__init__()
self.site = site
self.post_parser = PostParser(site)
def handle_starttag(self, tag, attrs):
if tag == 'article':
self.process_post = True
elif self.process_post and tag == 'a':
link = find_attribute(attrs, 'href')
if link is not None:
req = Request(self.site + link[1])
# delay between requests
now = time()
to_sleep = 0.2 - abs(now - self.prev_time)
if to_sleep > 0:
sleep(to_sleep)
self.prev_time = time()
with urlopen(req) as resp:
if resp.status == 200:
self.post_parser.feed(resp.read().decode('utf-8'))
def handle_endtag(self, tag):
if tag == 'article':
self.process_post = False
def handle_data(self, data):
if self.post_count is None and 'Showing' in data:
pass
class CreatorParser(HTMLParser):
post_count = None
prev_time = time()
def __init__(self, site):
super().__init__()
self.site = site
def handle_data(self, data):
if self.post_count is None and 'Showing' in data:
post_count = int(data.split(' ')[-1])
for page in range(post_count / 50):
params = urlencode({'o': page * 50})
req = Request(f'{self.site}?{params}')
# delay between pages
now = time()
to_sleep = 0.2 - abs(now - self.prev_time)
if to_sleep > 0:
sleep(to_sleep)
self.prev_time = time()
with urlopen(req) as resp:
if resp.status != 200:
continue
PageParser(self.site).feed(resp.read().decode('utf-8'))
def handle_starttag(self, tag, attrs):
pass
def handle_endtag(self, tag):
pass
def main():
parser = ArgumentParser()
parser.add_argument('artist_url', nargs=1, type=str)
args = parser.parse_args()
url = urlparse(args.artist_url[0])
site = f'{url.scheme}://{url.netloc}'
req = Request(args.artist_url[0])
with urlopen(req) as resp:
if resp.status != 200:
exit(1)
PageParser(site).feed(resp.read().decode('utf-8'))
if __name__ == '__main__':
main()