diff --git a/requirements.txt b/requirements.txt index 83c50e6..83eb6ea 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,12 +9,11 @@ Flask==3.1.2 h11==0.16.0 httpcore==1.0.9 httptools==0.7.1 -httpx==0.28.1 +httpx idna==3.11 itsdangerous==2.2.0 janus==2.0.0 Jinja2==3.1.6 -libsass==0.23.0 livereload==2.7.1 MarkupSafe==3.0.3 pydantic==2.12.5 @@ -22,7 +21,6 @@ pydantic-settings==2.12.0 pydantic_core==2.41.5 python-dotenv==1.2.1 PyYAML==6.0.3 -requests==2.32.5 starlette==0.38.6 tornado==6.5.4 typing-inspection==0.4.2 diff --git a/src/config.py b/src/config.py index 346dcbc..cc1b698 100644 --- a/src/config.py +++ b/src/config.py @@ -8,7 +8,7 @@ class PathsConfig(BaseSettings): inventory_file: str = Field(default="cheatsheet_inventory.json", description="Cheatsheet inventory file") templates: str = Field(default="templates", description="Templates directory") static: str = Field(default="static", description="Static files directory") - output: str = Field(default="prod", description="Output directory") + output: str = Field(default="out", description="Output directory") prod: str = Field(default="prod", description="Production directory") diff --git a/src/inventory.py b/src/inventory.py index 50b5de4..1497ede 100644 --- a/src/inventory.py +++ b/src/inventory.py @@ -7,6 +7,8 @@ from sources.gitea import process_gitea from logger import get_worker_thread_logger def load_cheatsheet_inventory(file: str) -> CSInventoryConfig: + logger = get_worker_thread_logger() + logger.info(f"Loading cheatsheet inventory from {file}") if not os.path.exists(file): res = CSInventoryConfig(items=[]) else: diff --git a/src/main.py b/src/main.py index 5675662..95674aa 100644 --- a/src/main.py +++ b/src/main.py @@ -39,7 +39,6 @@ def worker(): async def lifespan(app: FastAPI): global build_queue, build_queue_sync, build_queue_async settings = load_settings() - logger = getLogger("uvicorn").getChild("lifespan") build_queue = janus.Queue() diff --git a/src/sources/__init__.py b/src/sources/__init__.py index 85506a7..641c237 100644 --- a/src/sources/__init__.py +++ b/src/sources/__init__.py @@ -15,6 +15,7 @@ class CSSourceBase(BaseModel): class CSSourceGitea(CSSourceBase): type: Literal[CheatsheetSourceType.GITEA_SOURCE] base_url: str + fetch_url: str | None = Field(default=None) repo: str owner: str tag: str diff --git a/src/sources/gitea.py b/src/sources/gitea.py index 764108e..2ef397a 100644 --- a/src/sources/gitea.py +++ b/src/sources/gitea.py @@ -3,34 +3,41 @@ from sources import CSSourceGitea, CSItem, CSInventoryItem from sources.util import cache_cheatsheet, get_datestring import httpx from pathlib import Path +from logger import get_worker_thread_logger def process_gitea(item: CSInventoryItem, outdir: str) -> list[CSItem] | None: + logger = get_worker_thread_logger() + logger.info(f"Processing Gitea cheatsheet: {item.source.owner}/{item.source.repo}@{item.source.tag}") source: CSSourceGitea = item.source - commit_hash = get_release_commit_sha(source.base_url, source.owner, source.repo, source.tag) - asserts = list_release_assets(source.base_url, source.owner, source.repo, source.tag) + commit_hash = get_release_commit_sha(source.fetch_url or source.base_url, source.owner, source.repo, source.tag) + assets = list_release_assets(source.fetch_url or source.base_url, source.owner, source.repo, source.tag) - asserts = list(filter(lambda a: a[1].endswith(".pdf"), asserts)) - asserts = list(map(lambda a: (a[0], f"{source.base_url}/repos/{source.owner}/{source.repo}/releases/download/{source.tag}/{a[0]}"), asserts)) + assets = list(filter(lambda a: a[1].endswith(".pdf"), assets)) + print(assets) + assets_urls = list(map(lambda a: ( + f"{source.fetch_url or source.base_url}/{source.owner}/{source.repo}/releases/download/{source.tag}/{a[1]}", + f"{source.base_url}/{source.owner}/{source.repo}/releases/download/{source.tag}/{a[1]}" + ), assets), + ) - print(f"Found {len(asserts)} PDF assets in Gitea release {source.owner}/{source.repo}@{source.tag}") + logger.info(f"Found {len(assets_urls)} PDF assets in Gitea release {source.owner}/{source.repo}@{source.tag}") res = [] - for a in asserts: - res_url = a[0] + for fetch_url, real_url in assets_urls: if item.cache: - cache_url = cache_cheatsheet(a[0], outdir) + cache_url = cache_cheatsheet(fetch_url, outdir) if cache_url: - res_url = cache_url + real_url = cache_url else: continue - name = Path(a[1]).stem + name = Path(real_url).stem res.append(CSItem( - url = res_url, + url = real_url, date=get_datestring(), commit=commit_hash[:10] if commit_hash else "", author=item.author if item.author else source.owner, diff --git a/src/sources/util.py b/src/sources/util.py index 3ffdcdb..1748b32 100644 --- a/src/sources/util.py +++ b/src/sources/util.py @@ -2,6 +2,7 @@ import httpx import datetime import os from pathlib import Path +from logger import get_worker_thread_logger from urllib.parse import urlparse def get_datestring() -> str: @@ -9,16 +10,17 @@ def get_datestring() -> str: def cache_cheatsheet(url, outdir: str) -> str | None: - - print("Caching cheatsheet from", url) + logger = get_worker_thread_logger() + logger.info(f"Caching cheatsheet from {url}") try: with httpx.Client() as client: r = client.get(url, timeout=5.0) if not r.is_success and r.headers.get("Content-Type") != "application/pdf": + logger.error(f"Failed to fetch URL: {url} (status code: {r.status_code})") return None except httpx.TimeoutException: - print("Timeout fetching URL:", url) + logger.error(f"Timeout fetching URL: {url}") return None data = r.content @@ -33,6 +35,6 @@ def cache_cheatsheet(url, outdir: str) -> str | None: with open(os.path.join(outdir, filesname), "wb") as f: f.write(data) - print("Saved file to", filesname) + logger.info(f"Saved file to {filesname}") return filesname