#!/bin/sh

#########################################################################
# Scrape Yahoo! - Generates a list of Yahoo! Outbound IPs for Postallow #
# https://github.com/edmundlod/postallow                                #
#########################################################################

# By Steve Cook (https://www.stevecook.net/)
# and Steve Jenkins (https://www.stevejenkins.com/)
# Updated in 2025 by Edmund Lodewijks

version="1.2"
lastupdated="22 May 2025"

# Verizon, AOL and Yahoo servers have allegedly merged, and the scraped
# IP addresses are for all their outgoing mail servers..
yahoo_url="https://senders.yahooinc.com/outbound-mail-servers/"

# NO NEED TO EDIT PAST THIS LINE

#################################################################

# Abort script on error
set -e

if ! aggregateCIDR=$(which aggregateCIDR.pl) ; then
    echo "fatal: unable to locate aggregateCIDR.pl"
    echo "https://github.com/nabbi/route-summarization"
    exit 1
fi

printf "Scraping Yahoo! outbound IP addresses...\n"

# Check for passed config file
if [ -n "$1" ]; then
        config_file="$1"
elif [ -f "/etc/postallow.conf" ]; then
    config_file="/etc/postallow.conf"
elif [ -f "/etc/postallow/postallow.conf" ]; then
    config_file="/etc/postallow/postallow.conf"
elif [ -f "/usr/local/etc/postallow.conf" ]; then
    config_file="/usr/local/etc/postallow.conf"
elif [ -f "/usr/local/etc/postallow/postallow.conf" ]; then
    config_file="/usr/local/etc/postallow/postallow.conf"
fi

# Read config file options   
if [ -s $config_file ] ; then
        printf "\nReading options from %s...\n" "$config_file"
        . "${config_file}"
else
        >&2 printf "%s: Can't find %s. Exiting.\n" "$0" "$config_file"
        exit 1
fi

# Check for wget or curl
if [ `command -v wget` ]
then
        content=$(wget --user-agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:83.0) Gecko/20100101 Firefox/83.0" $yahoo_url -q -O -)
elif [ -n "$(command -v curl)" ]
then
        content=$(curl -A "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:83.0) Gecko/20100101 Firefox/83.0" -s -L $yahoo_url)
else
        echo "wget or curl required.";
        exit 1;
fi

# Extract text content from table cells only, to avoid matching IPs in HTML/CSS/JS.
# Uses Perl (already required for aggregateCIDR.pl) to handle minified single-line HTML.
td_content=$(echo "$content" | perl -ne 'while (/<td[^>]*>([^<]+)<\/td>/g) { print "$1\n" }')

# Scrape and format IPv4 addresses from table cells
ipv4_mailers=$(echo "$td_content" | grep -E -o '(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)/([0-9]{1,3}?)' | while read line; do echo "ip4:$line"; done)

# Abort if nothing was scraped — do not overwrite the existing file with empty output
if [ -z "$ipv4_mailers" ]; then
    >&2 printf "ERROR: No IPv4 addresses scraped from %s\n" "$yahoo_url"
    >&2 printf "The page structure may have changed. %s has NOT been updated.\n" "$yahoo_static_hosts"
    exit 1
fi

# Overwrite old Yahoo! mailer list with scraped IPv4 hosts
echo "$ipv4_mailers" | ${aggregateCIDR} --quiet --spf > "$yahoo_static_hosts"

# Scrape and format IPv6 addresses from table cells
ipv6_mailers=$(echo "$td_content" | grep -E -o '(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))' | while read line; do echo "ipv6:$line"; done)

# Append IPv6 hosts to newly-created Yahoo! mailer list
echo "$ipv6_mailers" >> "$yahoo_static_hosts"

printf '\nVerizon/AOL/Yahoo! outbound mailers updated.\n'

exit
