Last Updated: January 27, 2021
·
582
· kalinin84

BeautifulSoup

import re
import json
import requests
from bs4 import BeautifulSoup


def load_json(filename):
    with open(filename, 'r', encoding='utf-8') as jsonfile:
        return json.load(jsonfile)


def save_json(filename, data):
    with open(filename, 'w', encoding='utf-8') as outfile:
        json.dump(data, outfile, indent=4, sort_keys=True, ensure_ascii=False)


def fetch(url, headers):
    response = requests.get(url=url, headers=headers)
    return response.content


def parse_html(html):
    soup = BeautifulSoup(html, 'html.parser')
    title = soup.title.string
    text = soup.get_text()
    links = []
    for link in soup.find_all('a'):
        links.append(link.get('href'))
    return {'title': title, 'text': text, 'links': links}