Use regex for extracting hosts

This commit is contained in:
/techno 2023-08-06 20:29:07 +00:00 committed by slashtechno
parent df2a598c9e
commit bc94e67635
Signed by: slashtechno
GPG Key ID: 8EC1D9D9286C2B17
5 changed files with 41 additions and 22 deletions

View File

@ -1,2 +1,2 @@
export CLOUDFLARE_ACCOUNT_ID=
export CLOUDFLARE_TOKEN=
CLOUDFLARE_ACCOUNT_ID=
CLOUDFLARE_TOKEN=

1
.gitignore vendored
View File

@ -8,3 +8,4 @@ tmp.py
.venv
dist/
.ruff_cache/
hosts.txt

10
.vscode/settings.json vendored
View File

@ -1,4 +1,10 @@
{
"python.languageServer": "Pylance",
"python.analysis.ignore": [ "*" ] // Ruff is used for linting but Pylance still is useful
"python.languageServer": "Pylance", // Ruff is used for linting but Pylance still is useful for intellisense
"python.analysis.ignore": [
"*"
],
"python.analysis.exclude": [
"."
],
"python.linting.enabled": false
}

View File

@ -102,8 +102,9 @@ def main():
try:
args.func(args)
except AttributeError as e:
logger.debug(e)
logger.error("No subcommand specified")
argparser.print_help()
exit(1)
def upload_to_cloudflare(args):

View File

@ -1,5 +1,5 @@
import pathlib
import re
import requests
@ -53,23 +53,34 @@ class Config:
# Convert a hosts file to a simple hostname list
def convert_to_list(file: pathlib.Path) -> list:
with open(file, "r") as f:
# Don't read commented lines; strip whitespace;
# remove 127.0.0.1 from beginning of line;
# ignore lines with "localhost"; ignore lines with "::1";
# ignore newlines
hosts = [
i[10:].strip()
for i in f.readlines()
if not i.startswith("#") and "localhost" not in i and "::1" not in i
# Loop through the file and using regex, only get the domain names
# Remove the prefixed loopback domain and suffixed comments
# Remove any empty strings
loopback = [
"localhost",
"::1",
"localhost.localdomain",
"broadcasthost",
"local",
"ip6-localhost",
"ip6-loopback",
"ip6-localnet",
"ip6-mcastprefix",
"ip6-allnodes",
"ip6-allrouters",
"ip6-allhosts",
"0.0.0.0",
]
# Equivalent to:
# for x in f.readlines():
# if not x.startswith('#') and 'localhost' not in x and '::1' not in x:
# hosts.append(x[10:].strip())
# If there are any empty strings in the list, remove them
# For some reason, whitelist seems to still be present
hosts = [i for i in hosts if i != ""]
matches = [
re.search(r"^(?:127\.0\.0\.1|0\.0\.0\.0|::1)\s+(.+?)(?:\s+#.+)?$", line)
for line in f
]
hosts = [
match.group(1)
for match in matches
if match and match.group(1) not in loopback
]
print(f"First 5 hosts: {hosts[:5]}")
return hosts