Use regex for extracting hosts

This commit is contained in:
/techno 2023-08-06 20:29:07 +00:00 committed by slashtechno
parent df2a598c9e
commit bc94e67635
Signed by: slashtechno
GPG Key ID: 8EC1D9D9286C2B17
5 changed files with 41 additions and 22 deletions

View File

@ -1,2 +1,2 @@
export CLOUDFLARE_ACCOUNT_ID= CLOUDFLARE_ACCOUNT_ID=
export CLOUDFLARE_TOKEN= CLOUDFLARE_TOKEN=

1
.gitignore vendored
View File

@ -8,3 +8,4 @@ tmp.py
.venv .venv
dist/ dist/
.ruff_cache/ .ruff_cache/
hosts.txt

10
.vscode/settings.json vendored
View File

@ -1,4 +1,10 @@
{ {
"python.languageServer": "Pylance", "python.languageServer": "Pylance", // Ruff is used for linting but Pylance still is useful for intellisense
"python.analysis.ignore": [ "*" ] // Ruff is used for linting but Pylance still is useful "python.analysis.ignore": [
"*"
],
"python.analysis.exclude": [
"."
],
"python.linting.enabled": false
} }

View File

@ -102,8 +102,9 @@ def main():
try: try:
args.func(args) args.func(args)
except AttributeError as e: except AttributeError as e:
logger.debug(e) logger.error("No subcommand specified")
argparser.print_help() argparser.print_help()
exit(1)
def upload_to_cloudflare(args): def upload_to_cloudflare(args):

View File

@ -1,5 +1,5 @@
import pathlib import pathlib
import re
import requests import requests
@ -53,23 +53,34 @@ class Config:
# Convert a hosts file to a simple hostname list # Convert a hosts file to a simple hostname list
def convert_to_list(file: pathlib.Path) -> list: def convert_to_list(file: pathlib.Path) -> list:
with open(file, "r") as f: with open(file, "r") as f:
# Don't read commented lines; strip whitespace; # Loop through the file and using regex, only get the domain names
# remove 127.0.0.1 from beginning of line; # Remove the prefixed loopback domain and suffixed comments
# ignore lines with "localhost"; ignore lines with "::1"; # Remove any empty strings
# ignore newlines loopback = [
hosts = [ "localhost",
i[10:].strip() "::1",
for i in f.readlines() "localhost.localdomain",
if not i.startswith("#") and "localhost" not in i and "::1" not in i "broadcasthost",
"local",
"ip6-localhost",
"ip6-loopback",
"ip6-localnet",
"ip6-mcastprefix",
"ip6-allnodes",
"ip6-allrouters",
"ip6-allhosts",
"0.0.0.0",
] ]
# Equivalent to: matches = [
# for x in f.readlines(): re.search(r"^(?:127\.0\.0\.1|0\.0\.0\.0|::1)\s+(.+?)(?:\s+#.+)?$", line)
# if not x.startswith('#') and 'localhost' not in x and '::1' not in x: for line in f
# hosts.append(x[10:].strip()) ]
hosts = [
# If there are any empty strings in the list, remove them match.group(1)
# For some reason, whitelist seems to still be present for match in matches
hosts = [i for i in hosts if i != ""] if match and match.group(1) not in loopback
]
print(f"First 5 hosts: {hosts[:5]}")
return hosts return hosts