Tool A tool for listing all downloadable files for an artist on kemono.su

thegodfather2049

New Member
Dec 30, 2022
3
0
I wrote a small Python 3 script to list every downloadable file (that isn't an image) on a creator's page on .

This is useful for finding game releases posted as ZIP files, like in the case of Revenge of Yagiri ( ).

F95Zone doesn't allow uploading Python scripts so here is the full source code instead:

Python:
from urllib.request import urlopen, Request
from urllib.parse import urlparse, urlencode
from argparse import ArgumentParser
from html.parser import HTMLParser
from time import time, sleep


"""
Lists every download for a creator on https://kemono.su
"""


def find_attribute(attrs, attr):
    for elem in attrs:
        if elem[0] == attr:
            return elem
    return None


class PostParser(HTMLParser):
    process_downloads = False
    is_li = False

    def __init__(self, site):
        super().__init__()
        self.site = site

    def handle_starttag(self, tag, attrs):
        if tag == 'ul':
            class_ = find_attribute(attrs, 'class')
            if class_ is not None and 'post__attachments' in class_[1]:
                self.process_downloads = True
        elif self.process_downloads and tag == 'li':
            self.is_li = True
        elif self.is_li and tag == 'a':
            link = find_attribute(attrs, 'href')
            if link is not None:
                if urlparse(link[1]).netloc == '':
                    print(f'{self.site}{link[1]}')
                else:
                    print(link[1])

    def handle_endtag(self, tag):
        if self.is_li and tag == 'li':
            self.is_li = False
        elif self.process_downloads and tag == 'ul':
            self.process_downloads = False

    def handle_data(self, data):
        pass


class PageParser(HTMLParser):
    process_post = False
    prev_time = time()
    post_count = None

    def __init__(self, site):
        super().__init__()
        self.site = site
        self.post_parser = PostParser(site)

    def handle_starttag(self, tag, attrs):
        if tag == 'article':
            self.process_post = True
        elif self.process_post and tag == 'a':
            link = find_attribute(attrs, 'href')
            if link is not None:
                req = Request(self.site + link[1])
                # delay between requests
                now = time()
                to_sleep = 0.2 - abs(now - self.prev_time)
                if to_sleep > 0:
                    sleep(to_sleep)
                self.prev_time = time()

                with urlopen(req) as resp:
                    if resp.status == 200:
                        self.post_parser.feed(resp.read().decode('utf-8'))

    def handle_endtag(self, tag):
        if tag == 'article':
            self.process_post = False

    def handle_data(self, data):
        if self.post_count is None and 'Showing' in data:
            pass


class CreatorParser(HTMLParser):
    post_count = None
    prev_time = time()

    def __init__(self, site):
        super().__init__()
        self.site = site

    def handle_data(self, data):
        if self.post_count is None and 'Showing' in data:
            post_count = int(data.split(' ')[-1])
            for page in range(post_count / 50):
                params = urlencode({'o': page * 50})
                req = Request(f'{self.site}?{params}')
                # delay between pages
                now = time()
                to_sleep = 0.2 - abs(now - self.prev_time)
                if to_sleep > 0:
                    sleep(to_sleep)
                self.prev_time = time()

                with urlopen(req) as resp:
                    if resp.status != 200:
                        continue
                    PageParser(self.site).feed(resp.read().decode('utf-8'))

    def handle_starttag(self, tag, attrs):
        pass

    def handle_endtag(self, tag):
        pass


def main():
    parser = ArgumentParser()
    parser.add_argument('artist_url', nargs=1, type=str)
    args = parser.parse_args()

    url = urlparse(args.artist_url[0])
    site = f'{url.scheme}://{url.netloc}'

    req = Request(args.artist_url[0])
    with urlopen(req) as resp:
        if resp.status != 200:
            exit(1)
        PageParser(site).feed(resp.read().decode('utf-8'))


if __name__ == '__main__':
    main()
The code uses no external modules, only the ones in the Python 3 standard library. Enjoy!