Estructura de archivos
/website/website/tiktok.html
/TiktokBrowser.py
/utils.py
tiktok.html
Se importara el el codigo acrawler.js<script src=" https://www.tiktok.com/acrawler/acrawler.js"></script>
utils.py
Aqui tendremos el web list de tiktok es decir las peticiones que se puede hacer para un paginado.
def python_list2_web_list(data):
web_list = "[\""
web_list += '", "'.join(data)
web_list += "\"]"
return web_list
TiktokBrowser.py
Se esta clase realizar el instancia miento del Browser
__author__ = "hcushi asecas"
copyright__ = "Copyright 2021"
import osimport asyncioimport shutilfrom pyppeteer import launchfrom .utils import python_list2_web_listclass TikTokBrowser:def __init__(self, user_agent):self.userAgent = user_agentself.args = ["--no-sandbox","--disable-setuid-sandbox","--disable-infobars","--window-position=0,0","--ignore-certifcate-errors","--ignore-certifcate-errors-spki-list","--user-agent=" + self.userAgent,'--headless',"--disable-gpu","--single-process","--no-zygote","--disable-dev-shm-usage"]self.options = {'args': self.args,'headless': False,'autoClose': False,'ignoreHTTPSErrors': True,'userDataDir': "./tmp",'handleSIGINT': False,'handleSIGTERM': False,'handleSIGHUP': False}self.api_list = ["/api/user/detail","/api/user/list/","/api/music/detail","/api/item/detail","/api/challenge/detail/","/share/item/list","/api/item_list/","/api/comment/list/","/api/comment/list/reply/","/api/discover/*","/api/commit/follow/user/","/api/recommend/user/","/api/impression/write/","/share/item/explore/list","/api/commit/item/digg/","/node/share/*","/discover/render/*"]parent_folder = os.path.dirname(__file__)self.tiktok_dummy_page = "file://" + os.path.join(parent_folder, "website", "tiktok.html")def clean_tmp(self):tmp = "./tmp"shutil.rmtree(tmp)def fetch_auth_params(self, url, language='en'):try:return asyncio.get_event_loop().run_until_complete(self.async_fetch_auth_params(url, language))except RuntimeError as ex:if "There is no current event loop in thread" in str(ex):loop = asyncio.new_event_loop()asyncio.set_event_loop(loop)return asyncio.get_event_loop().run_until_complete(self.async_fetch_auth_params(url, language))async def async_fetch_auth_params(self, url, language):browser = await launch(self.options)page = await browser.newPage()await page.setCacheEnabled(False)await page.evaluateOnNewDocument("""() => {delete navigator.__proto__.webdriver;}""")await page.setUserAgent(self.userAgent)await page.setExtraHTTPHeaders({'Accept-Language': language})await page.goto(self.tiktok_dummy_page, {'waitUntil': "load"})signature = await page.evaluate('''() => {var init_token = window.byted_acrawler.init({aid: 1988,dfp: !1,boe: !1,intercept: !0,enablePathList: ''' +python_list2_web_list(self.api_list)+ '''});var token = window.byted_acrawler.sign({url: "''' +url + '''"});return token;}''')# await page.close()await browser.close()return signature
Como usar la clase
Para usar lo que se debe hacer es crear un main.py o a su gusto.
ahí se importara la clase TiktokBrowser definir el user-agent crear la url de tiktok para el
paginado para cada caso es una url diferente y parámetros distintos , por lo cual en el
siguiente código notar que la url no esta completa le hace falta los parámetros de paginado, para saber dichos parámetros investigar el header de tiktok en cualquier navegador mendiante el inspector de codigo se podra aceder a network y buscar la peticion encargada de la paginacion.
import requests
from TikTokBrowser import TikTokBrowserUSER_AGENT = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36
(KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
tiktok_browser = TikTokBrowser(USER_AGENT)#A esta url se le debe agregar los parametros necesarios# para la peticion del paginadourl_tiktok = "https://www.tiktok.com/api/challenge/item_list/"
_signature = tiktok_browser.fetch_auth_params(url_tiktok)url_tiktok = url_tiktok + "&" +_signature
content = requests.get(url_tiktok, header = {"user-agent": USER_AGENT})