Best RPA tips and tricks-拾光赋

I’ve had the pleasure of working on Python RPA scripts, focusing on automation and data extraction from both modern and legacy web applications.

Two key takeaways were that automating and extracting data from modern web applications is relatively straightforward, but working with older, more complex applications presented some unique challenges.

Driver.cookies() this function helps to take the current cookies of the tab and we can reuse the same cookies to hit the api endpoint (which contains the data we need). By this we can reduce the automation process and scrap the data faster and effectively.


def get_hash_value(customer_id,document_id):
    cookies = driver.get_cookies()
    cookie_string=""
    #this loop sets the necessary cookies
    for cookie in cookies:
        cookie_string+=cookie['name']+'='+cookie['value']+'; '
    headers = {
        'authority': 'www.example.com',
        'accept': 'application/json, text/javascript, */*; q=0.01',
        'accept-language': 'en-US,en;q=0.9',
        'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
        'cookie': cookie_string,
        'origin': 'https://qa.devfovea.com',
        'referer': 'https://www.example.com/secret',
        'sec-ch-ua': '"Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"Windows"',
        'sec-fetch-dest': 'empty',
        'sec-fetch-mode': 'cors',
        'sec-fetch-site': 'same-origin',
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
        'x-requested-with': 'XMLHttpRequest',
    }
    data = {
    'querystring': f'CustId={customer_id}&DocumentId={document_id}',
    }
    url = 'https://www.example.com/GetHashedValue'
    response = requests.post(url, headers=headers, data=data)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Request failed with status code {response.status_code}")
def get_hash_value(customer_id,document_id):
    cookies = driver.get_cookies()
    cookie_string=""
    #this loop sets the necessary cookies
    for cookie in cookies:
        cookie_string+=cookie['name']+'='+cookie['value']+'; '
    headers = {
        'authority': 'www.example.com',
        'accept': 'application/json, text/javascript, */*; q=0.01',
        'accept-language': 'en-US,en;q=0.9',
        'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
        'cookie': cookie_string,
        'origin': 'https://qa.devfovea.com',
        'referer': 'https://www.example.com/secret',
        'sec-ch-ua': '"Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"Windows"',
        'sec-fetch-dest': 'empty',
        'sec-fetch-mode': 'cors',
        'sec-fetch-site': 'same-origin',
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
        'x-requested-with': 'XMLHttpRequest',
    }
    data = {
    'querystring': f'CustId={customer_id}&DocumentId={document_id}',
    }
    url = 'https://www.example.com/GetHashedValue'
    response = requests.post(url, headers=headers, data=data)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Request failed with status code {response.status_code}")
def get_hash_value(customer_id,document_id):
    cookies = driver.get_cookies()
    cookie_string=""
    #this loop sets the necessary cookies
    for cookie in cookies:
        cookie_string+=cookie['name']+'='+cookie['value']+'; '
    headers = {
        'authority': 'www.example.com',
        'accept': 'application/json, text/javascript, */*; q=0.01',
        'accept-language': 'en-US,en;q=0.9',
        'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
        'cookie': cookie_string,
        'origin': 'https://qa.devfovea.com',
        'referer': 'https://www.example.com/secret',
        'sec-ch-ua': '"Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"Windows"',
        'sec-fetch-dest': 'empty',
        'sec-fetch-mode': 'cors',
        'sec-fetch-site': 'same-origin',
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
        'x-requested-with': 'XMLHttpRequest',
    }
    data = {
    'querystring': f'CustId={customer_id}&DocumentId={document_id}',
    }
    url = 'https://www.example.com/GetHashedValue'
    response = requests.post(url, headers=headers, data=data)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Request failed with status code {response.status_code}")

Enter fullscreen mode Exit fullscreen mode

2.To Extract specific datas from a table, You can find the particular table element. And iterate the the table through values


from selenium import webdriver
from selenium.webdriver.common.by import By
# Initialize the WebDriver
driver = webdriver.Chrome()  # Ensure chromedriver is in your PATH
# Navigate to the webpage
url = 'https://example.com/page-with-table'  # Replace with the target URL
driver.get(url)
# Locate the table by its ID
table = driver.find_element(By.ID, 'example-table')  # Replace with the actual ID of the table
# Extract table rows
rows = table.find_elements(By.TAG_NAME, 'tr')
# Initialize a list to store table data
table_data = []
# Loop through each row and extract cell data
for row in rows:
    # Extract cell data from each row
    cells = row.find_elements(By.TAG_NAME, 'td')
    cell_data = [cell.text for cell in cells]
    table_data.append(cell_data)
# Print the extracted data
for row in table_data:
    print(row)
# Close the WebDriver
driver.quit()
from selenium import webdriver
from selenium.webdriver.common.by import By

# Initialize the WebDriver
driver = webdriver.Chrome()  # Ensure chromedriver is in your PATH

# Navigate to the webpage
url = 'https://example.com/page-with-table'  # Replace with the target URL
driver.get(url)

# Locate the table by its ID
table = driver.find_element(By.ID, 'example-table')  # Replace with the actual ID of the table

# Extract table rows
rows = table.find_elements(By.TAG_NAME, 'tr')

# Initialize a list to store table data
table_data = []

# Loop through each row and extract cell data
for row in rows:
    # Extract cell data from each row
    cells = row.find_elements(By.TAG_NAME, 'td')
    cell_data = [cell.text for cell in cells]
    table_data.append(cell_data)

# Print the extracted data
for row in table_data:
    print(row)

# Close the WebDriver
driver.quit()
from selenium import webdriver
from selenium.webdriver.common.by import By

# Initialize the WebDriver
driver = webdriver.Chrome()  # Ensure chromedriver is in your PATH

# Navigate to the webpage
url = 'https://example.com/page-with-table'  # Replace with the target URL
driver.get(url)

# Locate the table by its ID
table = driver.find_element(By.ID, 'example-table')  # Replace with the actual ID of the table

# Extract table rows
rows = table.find_elements(By.TAG_NAME, 'tr')

# Initialize a list to store table data
table_data = []

# Loop through each row and extract cell data
for row in rows:
    # Extract cell data from each row
    cells = row.find_elements(By.TAG_NAME, 'td')
    cell_data = [cell.text for cell in cells]
    table_data.append(cell_data)

# Print the extracted data
for row in table_data:
    print(row)

# Close the WebDriver
driver.quit()

Enter fullscreen mode Exit fullscreen mode

3.The last one but useful one. Whenever I get stuck in writing scripts for an old and shabby websites I use selenium webdriver extension and record the actions and clicks and export to python script. You can find the selenium webdriver extension in chrome webstore.

原文链接：Best RPA tips and tricks

展开阅读全文

文章版权声明 1、本网站名称：拾光赋
2、本站永久网址：https://www.blogs.ink
3、本网站的文章部分内容可能来源于网络，仅供大家学习与参考，如有侵权，请联系站长QQ：805375623进行删除处理。
4、本站一切资源不代表本站立场，并不代表本站赞同其观点和对其真实性负责。
5、本站一律禁止以任何方式发布或转载任何违法的相关信息，访客发现请向站长举报
6、本站资源大多存储在云盘，如发现链接失效，请联系我们我们会第一时间更新。

THE END