Cara Scrape Data Toko yang Terdapat di Tokopedia Menggunakan Python
Pada pembahasan kali ini admin akan menjelaskan cara scrape data toko yang sudah terdaftar di toko pedia denga menggunakan python.
Sebelum masuk pada pengambilan datanya, instal terlebih dahulu libary yang di butuhkan.
- Pandas.
$ py -m pip install pandas
- Requests.
$ py -m pip install requests
Lalu kita akan melakukan import libary.
import requests
import pandas as pd
Copy url yang sudah kamu dapat tadi ke dalam visual studio code, lalu ubah beberapa variabelnya seperti di bawah ini.
url = 'https://gql.tokopedia.com/graphql/ShopProducts'
header = {'authority': 'gql.tokopedia.com',
'accept': '*/*',
'accept-language': 'id-ID,id;q=0.9,en-US;q=0.8,en;q=0.7',
'content-type': 'application/json',
'cookie': '_gcl_au=1.1.1413001030.1672228217; _UUID_NONLOGIN_=c1e6d871a598c87993e83a1501ab6f32; DID=c17aa6dcf617cbed0e6fba4639e43a825b415d046ba7852e650167030893d15c7b82e1ff1b4981e31e45c710a91c3b9c; DID_JS=YzE3YWE2ZGNmNjE3Y2JlZDBlNmZiYTQ2MzllNDNhODI1YjQxNWQwNDZiYTc4NTJlNjUwMTY3MDMwODkzZDE1YzdiODJlMWZmMWI0OTgxZTMxZTQ1YzcxMGE5MWMzYjlj47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=; _UUID_CAS_=cf7fbfcb-eb64-4d6c-950c-4f9fa2b9d118; _CASE_=2c75331e33756d656560637b75361e33756d677b753b353b756d751d363c36252336770722243623757b75341e33756d6660617b753b383930756d75757b753b3623756d75757b75271438756d75757b75201e33756d66656566676460627b75241e33756d66666264676260647b7524032e2732756d75653f757b75203f24756d750c2c0b75203625323f38222432083e330b756d66656566676460627b0b75243225213e343208232e27320b756d0b75653f0b757b0b750808232e273239363a320b756d0b75003625323f38222432240b752a7b2c0b75203625323f38222432083e330b756d677b0b75243225213e343208232e27320b756d0b7566623a0b757b0b750808232e273239363a320b756d0b75003625323f38222432240b752a0a757b753b022733756d75656765657a66657a656f03666f6d62666d66657c67606d6767752a; __auc=1457b782185589295917cd3559a; bm_sz=3B7F93DF63CC5463B284C008760717F3~YAAQJeUcuPacNmmFAQAAweubtRKzv/h6zWjqn5SFl1/+QcvAKpQoKTNeruq5GzfnEumQat0RYVVHW6/N1/u3GubO1aK8E3N3y7zv5woGs3FA39Nq02c4e5XMPXqKAeCmhvZP+WgRXFIdxvcj/qQTATSU6N5gnUkd2qROMfiniSVyoVnKT0Jp1q7qbG20eGPvoV1jUDoBVi+DsEbQbyJF/wWKkZi6gsnvMc9vtoYbzKAVtVmz2M58V600sFf95wWlgBimL7da8W2yVSfpv1UFSUkZ6/PPyjt5GQErqP6511QPFB5fAeI=~3553591~4340038; _gcl_aw=GCL.1673789178.CjwKCAiA5Y6eBhAbEiwA_2ZWITpDT3tQihrnx6f12R1Z_SS5v8I4QOajIsLsCgKIHBmbr7AQhXs29hoCq7MQAvD_BwE; _gid=GA1.2.187466445.1673789184; _gac_UA-126956641-6=1.1673789184.CjwKCAiA5Y6eBhAbEiwA_2ZWITpDT3tQihrnx6f12R1Z_SS5v8I4QOajIsLsCgKIHBmbr7AQhXs29hoCq7MQAvD_BwE; _SID_Tokopedia_=2cbN8ghfc7S2K0pY7QD_TuEW2UjeWPS2O1ckPmwBhOxxUGvMHcGgC1Dj7mHVNvXw_Yabyqn0PdI7y6d7hX5KhhChfrJ8L8wesNvy2dWfmdPeNccNKIgXnk9zSh1F7tDY; hfv_banner=true; _gac_UA-9801603-1=1.1673790632.CjwKCAiA5Y6eBhAbEiwA_2ZWITpDT3tQihrnx6f12R1Z_SS5v8I4QOajIsLsCgKIHBmbr7AQhXs29hoCq7MQAvD_BwE; _abck=9753AC2451958445CFC616A8A0ACC69F~0~YAAQdAA9Fz8WQheFAQAAtQGztQl2JPVldW2jyF2lyLROveJqugrSurw+hAtzhqLKjdiMGiMEEQUOK7wbF2svGPTvTZKvAJdCZ9a/LHFIz33dnoZNsyuiNUD8cz4Bs2JgrYP0jYztpAtELEl3p0MWYYOqYkD2ehperVYN9k50osr8Ibpg9XwkMWD84kSXqIva+iD/2S3v2UHX9Qc1Nxcz/ED2vsCcTdroFO6zqGhY3I7eW3A2PeMjTAtrKlnP+afU/eHC8CN9qqvQtCjdnFweZzLNrde/oTs7bSjH1dKTNOx2XYcIAPc+zJKWumSpaVip/0PS/GHOylPhscFAM8+GfFrrQpvYpDgH4dlyzuSZY9HmhgX9u/Vz4KaxsUewUoa6s8L2iVH2CSsyO7qAwrG/gcKE+5rvXamsPiLK~-1~-1~-1; bm_sv=1B822170B6B410104A16F59AFB03FD6F~YAAQdAA9FwMXQheFAQAA6hKztRJmnyLbeJqvg0JGGJL+FFCLBDx79fbufrZvOqjUsQT6Fz3tLyp0LMLUkc5r4ktYQw2uzBGvfxcaX8UKVw31lk38tkziCngLDtMKy59ck54G5RNDuboKddd5pecq9umBIOUoCKz/2CF/tdktZ6wM9VMTNih2K6q24tbVQzIgIgYR2iC/bJ+B9FWXOTVewUFT0iG9BWVvTyFiJvkG7cKd7UUckEXBkgbhWHlZhhZkmI26~1; bm_mi=CFA0C7801D2FC04AD26261DA272BCB06~YAAQDqwwF8FV7paFAQAASN/ntRKYqbuQbIVKdVIvWJAwJ1WYyZGYJmIU0ELE7V1QEJnMAdAGidbgkRcYOBlOG8gfD9GYSnS2OMwQT2cQWrvRSqAIw6NqBQOeDZB8d6ZfcgAwxkNIfXoMK6dRYHAf/w3SQ045N/hl3zVyDa3R5HgVnPCV4pqyO2kwFISJQmlq7kphSwZ/wcI08RcNivz7kT1LFwaC93YV7UJwCUDWiBSbu1juadUhA7W3QpRaDKdK26s3unvbRv585FgeSvM225yGX9Ttsmcn3slayFBuQ/UWGoISWeyoVYv24/Wwc1adg23JZdyk44mUxP0=~1; __asc=2fcddb57185b5e7f6ea4084192c; ak_bmsc=AA8432D6E8C5D880492BFABFEF624A8F~000000000000000000000000000000~YAAQDqwwF4BZ7paFAQAAmjnotRLbkjEVA1CVSQbjUXjmJCpZrriqlLM0APlDm1WrDHwQvwHznDfANfed+F5vc8R+Xu/PFxFvjLlnFfb1yIEDgX1KAjrRtbOpTc34q/MtNTOkIrfXydZe1hedXVg6eZumvGzdFgVllbQSCHuyvn7V5aT9t8RzG3w9mh+6aa+EEj8ZW0tm7hdBHqax+UME4ou+hofKxErxvJQaorgC/mI43MCdP60/FRBuwb7kTRtRUsaGCaaxLb251sYqF0o8BWw0tC4BD/b4OeD9sLSFggjx0xBbuG5DciKgw2kxAYWvz3ZxfS2hnTBl/hxb7j7RSA08o11XBFu04bNt9gbJNnDtKtbcfzdM3ibRgW038vpQZiVozSgzAZW/ldpjzTjAAgV2wsVklP67V0EH6RfyOzATXZ9/IUSdGET3fXYpFPLLTUbROlBhc9U/+OVgXfYFO6PJQ5/MkbIYNPpFH9teB704VvmbuuFvpnYaNX2nR6aBH3JQF4spOmb6jQLYzac9; _dc_gtm_UA-126956641-6=1; _dc_gtm_UA-9801603-1=1; _ga_70947XW48P=GS1.1.1673794055.3.1.1673794658.60.0.0; _ga=GA1.1.1251187054.1672228218',
'origin': 'https://www.tokopedia.com',
'referer': 'https://www.tokopedia.com/sentralkomputer-',
'sec-ch-ua': '"Not_A Brand";v="99", "Google Chrome";v="109", "Chromium";v="109"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-site',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
'x-device': 'default_v3',
'x-source': 'tokopedia-lite',
'x-tkpd-lite-service': 'zeus',
'x-version': '68ba647'}
query = f'[{{"operationName":"ShopProducts","variables":{{"sid":"12072236","page":1,"perPage":80,"etalaseId":"etalase","sort":1,"user_districtId":"2274","user_cityId":"176","user_lat":"","user_long":""}},"query":"query ShopProducts($sid:String\u0021, $page: Int, $perPage: Int, $keyword: String, $etalaseId: String, $sort: Int, $user_districtId:String, $user_cityId:String, $user_lat:String, $user_long:String) {{\\n GetShopProduct(shopID:$sid, filter:{{page:$page, perPage:$perPage, fkeyword:$keyword, fmenu:$etalaseId, sort:$sort, user_districtId:$user_districtId, user_cityId:$user_cityId, user_lat:$user_lat, user_long:$user_long}}) {{\\n status\\n errors\\n links {{\\n prev\\n next\\n __typename\\n }}\\n data {{\\n name\\n product_url\\n product_id\\n price {{\\n text_idr\\n __typename\\n }}\\n primary_image {{\\n original\\n thumbnail\\n resize300\\n __typename\\n }}\\n flags {{\\n isSold\\n isPreorder\\n isWholesale\\n isWishlist\\n __typename\\n }}\\n campaign {{\\n discounted_percentage\\n original_price_fmt\\n start_date\\n end_date\\n __typename\\n }}\\n label {{\\n color_hex\\n content\\n __typename\\n }}\\n label_groups {{\\n position\\n title\\n type\\n url\\n __typename\\n }}\\n badge {{\\n title\\n image_url\\n __typename\\n }}\\n stats {{\\n reviewCount\\n rating\\n __typename\\n }}\\n category {{\\n id\\n __typename\\n }}\\n __typename\\n }}\\n __typename\\n }}\\n}}\\n"}}]'
print('Mulai')
response = requests.post(url, headers=header, data=query)
products = response.json()[0]['data']['GetShopProduct']['data']
dtFrame = pd.DataFrame(products)
dtFrame.to_csv('data_toko_tokped.csv', encoding='utf-8')
print('Selesai.....')
Setelah itu hasil nya akan keluar dalam benntuk vcs.