Last Updated: January 27, 2021
· kalinin84


import re
import json
import requests
from bs4 import BeautifulSoup

def load_json(filename):
    with open(filename, 'r', encoding='utf-8') as jsonfile:
        return json.load(jsonfile)

def save_json(filename, data):
    with open(filename, 'w', encoding='utf-8') as outfile:
        json.dump(data, outfile, indent=4, sort_keys=True, ensure_ascii=False)

def fetch(url, headers):
    response = requests.get(url=url, headers=headers)
    return response.content

def parse_html(html):
    soup = BeautifulSoup(html, 'html.parser')
    title = soup.title.string
    text = soup.get_text()
    links = []
    for link in soup.find_all('a'):
    return {'title': title, 'text': text, 'links': links}