Hướng dẫn python singleton class

import httplib2

import os

import re

import threading

import urllib

import urllib.request

from urllib.parse import urlparse, urljoin

from bs4 import BeautifulSoup

class CrawlerSingleton(object):

    def __new__(cls):

        if not hasattr(cls, 'instance'):

            cls.instance = super(CrawlerSingleton, cls).__new__(cls)

        return cls.instance

def navigate_site(max_links = 5):

    parser_crawlersingleton = CrawlerSingleton()

    while parser_crawlersingleton.url_queue:

        if len(parser_crawlersingleton.visited_url) == max_links:

            return

        url = parser_crawlersingleton.url_queue.pop()

        http = httplib2.Http()

        try:

            status, response = http.request(url)

        except Exception:

            continue

        parser_crawlersingleton.visited_url.add(url)

        print(url)

        bs = BeautifulSoup(response, "html.parser")

        for link in BeautifulSoup.findAll(bs, 'a'):

            link_url = link.get('href')

            if not link_url:

                continue

            parsed = urlparse(link_url)

            if parsed.netloc and parsed.netloc != parsed_url.netloc:

                continue

            scheme = parsed_url.scheme

            netloc = parsed.netloc or parsed_url.netloc

            path = parsed.path

            link_url = scheme +'://' +netloc + path

            if link_url in parser_crawlersingleton.visited_url:

                continue

            parser_crawlersingleton.url_queue = [link_url] +\

                                                parser_crawlersingleton.url_queue

class ParallelDownloader(threading.Thread):

    def __init__(self, thread_id, name, counter):

        threading.Thread.__init__(self)

        self.name = name

    def run(self):

        print('Starting thread', self.name)

        download_images(self.name)

        print('Finished thread', self.name)

def download_images(thread_name):

    singleton = CrawlerSingleton()

    while singleton.visited_url:

        url = singleton.visited_url.pop()

        http = httplib2.Http()

        print(thread_name, 'Downloading images from', url)

        try:

            status, response = http.request(url)

        except Exception:

            continue

        bs = BeautifulSoup(response, "html.parser")

        images = BeautifulSoup.findAll(bs, 'img')

        for image in images:

            src = image.get('src')

            src = urljoin(url, src)

            basename = os.path.basename(src)

            print('basename:', basename)

            if basename != '':

                if src not in singleton.image_downloaded:

                    singleton.image_downloaded.add(src)

                    print('Downloading', src)

                    urllib.request.urlretrieve(src, os.path.join('images', basename))

                    print(thread_name, 'finished downloading images from', url)

def main():

    crwSingltn = CrawlerSingleton()

    crwSingltn.url_queue = [main_url]

    crwSingltn.visited_url = set()

    crwSingltn.image_downloaded = set()

    navigate_site()

    if not os.path.exists('images'):

        os.makedirs('images')

    thread1 = ParallelDownloader(1, "Thread-1", 1)

    thread2 = ParallelDownloader(2, "Thread-2", 2)

    thread1.start()

    thread2.start()

if __name__ == "__main__":

    parsed_url = urlparse(main_url)

    main()