banlanpy脚本-保存csdn文章为pdf/html/md 中发帖

import requests
from bs4 import BeautifulSoup
import random
import html2text
import os
import re
import pdfkit
import time
import logging
import json

# user_agent库:每次执行一次访问随机选取一个 user_agent,防止过于频繁访问被禁止
USER_AGENT_LIST = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.82 Safari/537.36",
# ... 其他 user agent ...
]

class CSDNSpider():
...