Get free proxy list (python)

Get the list of free proxies using https://free-proxy-list.net.

There are two type of script to obtain a list of proxy IP:port number from the free-proxy-list.net:

Urllib will provide you 164 proxies but BeautifulSoup will give you the entire list of 300 proxies.

Get proxy list with urllib

First we need to import some libraries:

from lxml.html import fromstring
from bs4 import BeautifulSoup as b
import urllib.request as urllib

Define the function to get the proxies:

def get_proxies():
    site = 'https://free-proxy-list.net/'
    hdr = {'User-Agent': 'Mozilla/5.0'}
    req = urllib.Request(site,headers=hdr)
    url = urllib.urlopen(req).read()
    html = b(url,"lxml")    
    rows = html.findAll("tr")
    proxies = []
    for row in rows:
        cols = row.find_all('td')
        cols = [ele.text for ele in cols]
        try:
            ipaddr = cols[0]        #ipAddress 
            portNum = cols[1]       #portNum 
            proxy = ipaddr+":"+portNum  #concatinating 
            portName = cols[6]          #portName variable yes / No
            if portName == "no":
                pass # proxies.append(str(proxy)) 
            else:
                #if portNum=='80' or portNum=='8080':
                proxies.append(str(proxy)) 
        except:
            pass
    return proxies

Let’s call the functio to get the proxy list:

print('Number of proxies:',len(get_proxies()))
print(get_proxies())

The entire code could be downloaded from https://raw.githubusercontent.com/robert4digital/get-free-proxies/master/getProxies_utillib.py or from the below text:

# Get proxies as list using utillib

# based on
# https://www.scrapehero.com/how-to-rotate-proxies-and-ip-addresses-using-python-3/
# https://www.scrapehero.com/how-to-fake-and-rotate-user-agents-using-python-3/

from lxml.html import fromstring
from bs4 import BeautifulSoup as b
import urllib.request as urllib

def get_proxies():
    site = 'https://free-proxy-list.net/'
    hdr = {'User-Agent': 'Mozilla/5.0'}
    req = urllib.Request(site,headers=hdr) #sending requests with headers
    url = urllib.urlopen(req).read() #opening and reading the source code
    html = b(url,"lxml")                #structuring the source code in proper format
    rows = html.findAll("tr")       #finding all rows in the table if any.
    proxies = []
    for row in rows:
        cols = row.find_all('td')
        cols = [ele.text for ele in cols]
        try:
            ipaddr = cols[0]        #ipAddress which presents in the first element of cols list
            portNum = cols[1]       #portNum which presents in the second element of cols list
            proxy = ipaddr+":"+portNum  #concatinating both ip and port
            portName = cols[6]          #portName variable result will be yes / No
            if portName == "no":
                pass # proxies.append(str(proxy)) #if yes then it appends the proxy with https
            else:
                # use specific ports only if you have port filters in your organisation
                #if portNum=='80' or portNum=='8080':
                proxies.append(str(proxy)) #if no then it appends the proxy with http
        except:
            pass
    return proxies

###################################################

if __name__ == "__main__":
    print('Number of proxies:',len(get_proxies()))
    print(get_proxies())

Get proxy list with BeautifulSoap

First let’s import all the libraries we need:

import requests
from bs4 import BeautifulSoup

Let’s create a function to get the list usinng requests and BeautifulSoup python packages:

# Get the list of free proxies as proxyIP:port
def get_300proxies():
    # list of proxies to be filled
    proxies = []

    # get the webpage content
    res = requests.get('https://free-proxy-list.net/',
                 headers={'User-Agent':'Mozilla/5.0'})
    soup = BeautifulSoup(res.text,"lxml")
    for items in soup.select("tbody tr"):
        # get proxyIP:port
        proxy_list = ':'.join([item.text for item in items.select("td")[:2]])
        proxies.append(proxy_list) # append proxy to the list
    return proxies # return list with proxyIP:port items

Run the function to get the list:

print('Number of proxies:',len(get_300proxies()))
print(get_300proxies())

The entire code could be copied from the below or downloaded from here: 
https://raw.githubusercontent.com/robert4digital/get-free-proxies/master/getProxies_BeautifulSoup.py

import requests
from bs4 import BeautifulSoup

# Get the list of free proxies as proxyIP:port
def get_300proxies():
    # list of proxies to be filled
    proxies = []

    # get the webpage content
    res = requests.get('https://free-proxy-list.net/', headers={'User-Agent':'Mozilla/5.0'})
    soup = BeautifulSoup(res.text,"lxml")
    for items in soup.select("tbody tr"):
        # get proxyIP:port
        proxy_list = ':'.join([item.text for item in items.select("td")[:2]])
        proxies.append(proxy_list)
    return proxies # return list with proxyIP:port items

###################################################

if __name__ == "__main__":
    print('Number of proxies:',len(get_300proxies()))
    print(get_300proxies())

Acknowledgement

Enjoy programming! @RD

Leave a Reply

Your email address will not be published. Required fields are marked *