From bc94e676354342994efa9791ccd1f04bf8d240e4 Mon Sep 17 00:00:00 2001 From: /techno <77907286+slashtechno@users.noreply.github.com> Date: Sun, 6 Aug 2023 20:29:07 +0000 Subject: [PATCH] Use regex for extracting hosts --- .env.example | 4 ++-- .gitignore | 1 + .vscode/settings.json | 10 ++++++++-- src/__main__.py | 3 ++- src/utils/utils.py | 45 +++++++++++++++++++++++++++---------------- 5 files changed, 41 insertions(+), 22 deletions(-) diff --git a/.env.example b/.env.example index b8108a6..ea58e6d 100644 --- a/.env.example +++ b/.env.example @@ -1,2 +1,2 @@ -export CLOUDFLARE_ACCOUNT_ID= -export CLOUDFLARE_TOKEN= \ No newline at end of file +CLOUDFLARE_ACCOUNT_ID= +CLOUDFLARE_TOKEN= \ No newline at end of file diff --git a/.gitignore b/.gitignore index 646d043..b0a5ff0 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ tmp.py .venv dist/ .ruff_cache/ +hosts.txt \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json index 77e20df..4b99084 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,4 +1,10 @@ { - "python.languageServer": "Pylance", - "python.analysis.ignore": [ "*" ] // Ruff is used for linting but Pylance still is useful + "python.languageServer": "Pylance", // Ruff is used for linting but Pylance still is useful for intellisense + "python.analysis.ignore": [ + "*" + ], + "python.analysis.exclude": [ + "." + ], + "python.linting.enabled": false } diff --git a/src/__main__.py b/src/__main__.py index c8af2e5..a236be8 100644 --- a/src/__main__.py +++ b/src/__main__.py @@ -102,8 +102,9 @@ def main(): try: args.func(args) except AttributeError as e: - logger.debug(e) + logger.error("No subcommand specified") argparser.print_help() + exit(1) def upload_to_cloudflare(args): diff --git a/src/utils/utils.py b/src/utils/utils.py index db9a187..745c24a 100644 --- a/src/utils/utils.py +++ b/src/utils/utils.py @@ -1,5 +1,5 @@ import pathlib - +import re import requests @@ -53,23 +53,34 @@ class Config: # Convert a hosts file to a simple hostname list def convert_to_list(file: pathlib.Path) -> list: with open(file, "r") as f: - # Don't read commented lines; strip whitespace; - # remove 127.0.0.1 from beginning of line; - # ignore lines with "localhost"; ignore lines with "::1"; - # ignore newlines - hosts = [ - i[10:].strip() - for i in f.readlines() - if not i.startswith("#") and "localhost" not in i and "::1" not in i + # Loop through the file and using regex, only get the domain names + # Remove the prefixed loopback domain and suffixed comments + # Remove any empty strings + loopback = [ + "localhost", + "::1", + "localhost.localdomain", + "broadcasthost", + "local", + "ip6-localhost", + "ip6-loopback", + "ip6-localnet", + "ip6-mcastprefix", + "ip6-allnodes", + "ip6-allrouters", + "ip6-allhosts", + "0.0.0.0", ] - # Equivalent to: - # for x in f.readlines(): - # if not x.startswith('#') and 'localhost' not in x and '::1' not in x: - # hosts.append(x[10:].strip()) - - # If there are any empty strings in the list, remove them - # For some reason, whitelist seems to still be present - hosts = [i for i in hosts if i != ""] + matches = [ + re.search(r"^(?:127\.0\.0\.1|0\.0\.0\.0|::1)\s+(.+?)(?:\s+#.+)?$", line) + for line in f + ] + hosts = [ + match.group(1) + for match in matches + if match and match.group(1) not in loopback + ] + print(f"First 5 hosts: {hosts[:5]}") return hosts