From e93e743f656f86dec9b6705fd311b9d071fd6dee Mon Sep 17 00:00:00 2001 From: alexander Date: Fri, 23 Jan 2026 09:21:42 +0100 Subject: [PATCH] moved to asyncd --- .gitignore | 4 +- Dockerfile | 6 ++- docker-compose.yml | 6 --- requirements.txt | 16 ++++-- src/build.py | 1 + src/inventory.py | 1 + src/main.py | 3 +- src/sources/gitea.py | 115 ++++++++++++++++++++++--------------------- src/sources/plain.py | 2 +- src/sources/util.py | 24 ++++++--- static/robots.txt | 4 ++ 11 files changed, 106 insertions(+), 76 deletions(-) delete mode 100644 docker-compose.yml create mode 100644 static/robots.txt diff --git a/.gitignore b/.gitignore index 6fc3840..a04cb52 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,6 @@ node_modules __pycache__/ package-lock.json -package.json \ No newline at end of file +package.json + +cheatsheet_inventory.json \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 70b3546..dad84e3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,10 +1,12 @@ -FROM python:3.12-slim +FROM python:3.12-alpine WORKDIR /app +RUN apk add --no-cache curl git libsass + COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt COPY . . -CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file +CMD ["python", "src/main.py"] \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml deleted file mode 100644 index 172cd75..0000000 --- a/docker-compose.yml +++ /dev/null @@ -1,6 +0,0 @@ - -services: - server_html: - build: . - ports: - - "8000:8000" diff --git a/requirements.txt b/requirements.txt index a3b9a15..bc1e80b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,22 +1,32 @@ annotated-types==0.7.0 +anyio==4.12.1 blinker==1.9.0 certifi==2026.1.4 charset-normalizer==3.4.4 click==8.3.1 +fastapi==0.115.0 Flask==3.1.2 +h11==0.16.0 +httpcore==1.0.9 +httptools==0.7.1 +httpx==0.28.1 idna==3.11 itsdangerous==2.2.0 Jinja2==3.1.6 -libsass==0.23.0 livereload==2.7.1 MarkupSafe==3.0.3 pydantic==2.12.5 pydantic_core==2.41.5 +python-dotenv==1.2.1 +PyYAML==6.0.3 requests==2.32.5 +starlette==0.38.6 tornado==6.5.4 typing-inspection==0.4.2 typing_extensions==4.15.0 urllib3==2.6.3 +uvicorn==0.30.1 +uvloop==0.22.1 +watchfiles==1.1.1 +websockets==16.0 Werkzeug==3.1.5 -fastapi==0.115.0 -uvicorn[standard]==0.30.1 diff --git a/src/build.py b/src/build.py index 3b3757d..76392d5 100644 --- a/src/build.py +++ b/src/build.py @@ -51,6 +51,7 @@ async def build(trigger_list: list[str] | None = None): # Copy to prod print("Copying to prod directory...") shutil.copytree(OUTPUT_DIR, PROD_DIR, dirs_exist_ok=True) + print("Done.") if __name__ == "__main__": diff --git a/src/inventory.py b/src/inventory.py index f1b9364..7419572 100644 --- a/src/inventory.py +++ b/src/inventory.py @@ -38,6 +38,7 @@ async def prepare_cheatsheets(config: CSInventoryConfig, outdir: str) -> list[CS print("Unknow Source Type:", item.source.type) except: traceback.print_exc() + print("Error processing item:", item) new_item = None if new_items: diff --git a/src/main.py b/src/main.py index 0f075a7..0e8ee83 100644 --- a/src/main.py +++ b/src/main.py @@ -55,5 +55,6 @@ async def trigger_all(): if __name__ == "__main__": import uvicorn - uvicorn.run("main:app", host="0.0.0.0", port=8000) + uvicorn.run(app, host="0.0.0.0", port=8000, log_level="debug") + \ No newline at end of file diff --git a/src/sources/gitea.py b/src/sources/gitea.py index 15df9c3..8a8f164 100644 --- a/src/sources/gitea.py +++ b/src/sources/gitea.py @@ -1,23 +1,27 @@ from sources import CSSourceGitea, CSItem, CSInventoryItem from sources.util import cache_cheatsheet, get_datestring -import requests +import httpx from pathlib import Path async def process_gitea(item: CSInventoryItem, outdir: str) -> list[CSItem] | None: source: CSSourceGitea = item.source - commit_hash = get_release_commit_sha(source.base_url, source.owner, source.repo, source.tag) - asserts = list_release_assets(source.base_url, source.owner, source.repo, source.tag) + commit_hash = await get_release_commit_sha(source.base_url, source.owner, source.repo, source.tag) + asserts = await list_release_assets(source.base_url, source.owner, source.repo, source.tag) - asserts = filter(lambda a: a[1].endswith(".pdf"), asserts) + asserts = list(filter(lambda a: a[1].endswith(".pdf"), asserts)) + asserts = list(map(lambda a: (a[0], f"{source.base_url}/repos/{source.owner}/{source.repo}/releases/download/{source.tag}/{a[0]}"), asserts)) + + print(f"Found {len(asserts)} PDF assets in Gitea release {source.owner}/{source.repo}@{source.tag}") res = [] for a in asserts: res_url = a[0] + if item.cache: - cache_url = cache_cheatsheet(a[0], outdir) + cache_url = await cache_cheatsheet(a[0], outdir) if cache_url: res_url = cache_url else: @@ -38,7 +42,7 @@ async def process_gitea(item: CSInventoryItem, outdir: str) -> list[CSItem] | No return res -def get_release_commit_sha(base_url, owner, repo, tag_name, token=None): +async def get_release_commit_sha(base_url, owner, repo, tag_name, token=None): """ Resolve the commit SHA for a Gitea release tag. @@ -50,49 +54,48 @@ def get_release_commit_sha(base_url, owner, repo, tag_name, token=None): :return: commit SHA (str) """ - headers = {} - if token: - headers["Authorization"] = f"token {token}" - session = requests.Session() - session.headers.update(headers) + async with httpx.AsyncClient() as client: + headers = {} + if token: + headers["Authorization"] = f"token {token}" - # 1) List tags and find the matching tag - tags_url = f"{base_url}/api/v1/repos/{owner}/{repo}/tags" - resp = session.get(tags_url) - resp.raise_for_status() - tags = resp.json() + # 1) List tags and find the matching tag + tags_url = f"{base_url}/api/v1/repos/{owner}/{repo}/tags" + resp = await client.get(tags_url, headers=headers) + resp.raise_for_status() + tags = resp.json() - tag = next((t for t in tags if t["name"] == tag_name), None) - if not tag: - raise ValueError(f"Tag '{tag_name}' not found") + tag = next((t for t in tags if t["name"] == tag_name), None) + if not tag: + raise ValueError(f"Tag '{tag_name}' not found") - # Lightweight tags usually already contain the commit SHA - commit_sha = tag.get("commit", {}).get("sha") - tag_obj_sha = tag.get("id") + # Lightweight tags usually already contain the commit SHA + commit_sha = tag.get("commit", {}).get("sha") + tag_obj_sha = tag.get("id") - # If commit.sha looks valid, return it - if commit_sha: - return commit_sha + # If commit.sha looks valid, return it + if commit_sha: + return commit_sha - # 2) Annotated tag: dereference via /git/tags/{sha} - if not tag_obj_sha: - raise RuntimeError("Tag object SHA missing; cannot dereference annotated tag") + # 2) Annotated tag: dereference via /git/tags/{sha} + if not tag_obj_sha: + raise RuntimeError("Tag object SHA missing; cannot dereference annotated tag") - git_tag_url = f"{base_url}/api/v1/repos/{owner}/{repo}/git/tags/{tag_obj_sha}" - resp = session.get(git_tag_url) - resp.raise_for_status() - annotated = resp.json() + git_tag_url = f"{base_url}/api/v1/repos/{owner}/{repo}/git/tags/{tag_obj_sha}" + resp = await client.get(git_tag_url, headers=headers) + resp.raise_for_status() + annotated = resp.json() - # The object pointed to by the tag (usually a commit) - target = annotated.get("object", {}) - if target.get("type") != "commit": - raise RuntimeError(f"Tag points to a {target.get('type')} instead of a commit") + # The object pointed to by the tag (usually a commit) + target = annotated.get("object", {}) + if target.get("type") != "commit": + raise RuntimeError(f"Tag points to a {target.get('type')} instead of a commit") return target.get("sha") -def list_release_assets(base_url, owner, repo, tag, token=None): +async def list_release_assets(base_url, owner, repo, tag, token=None): """ Return a list of (download_url, filename) for all assets of a Gitea release. @@ -103,26 +106,28 @@ def list_release_assets(base_url, owner, repo, tag, token=None): :param token: optional API token :returns: list of (download_url, filename) tuples """ - headers = {} - if token: - headers["Authorization"] = f"token {token}" - # 1) Get release by tag - rel_url = f"{base_url}/api/v1/repos/{owner}/{repo}/releases/tags/{tag}" - rel_resp = requests.get(rel_url, headers=headers) - rel_resp.raise_for_status() - release = rel_resp.json() + async with httpx.AsyncClient() as client: + headers = {} + if token: + headers["Authorization"] = f"token {token}" - assets = release.get("assets", []) - result = [] + # 1) Get release by tag + rel_url = f"{base_url}/api/v1/repos/{owner}/{repo}/releases/tags/{tag}" + rel_resp = await client.get(rel_url, headers=headers) + rel_resp.raise_for_status() + release: dict = rel_resp.json() - for asset in assets: - # Gitea asset info usually contains: - # - "browser_download_url" → direct URL - # - "name" → filename - download_url = asset.get("browser_download_url") - filename = asset.get("name") - if download_url and filename: - result.append((download_url, filename)) + assets = release.get("assets", []) + result = [] + + for asset in assets: + # Gitea asset info usually contains: + # - "browser_download_url" → direct URL + # - "name" → filename + download_url = asset.get("browser_download_url") + filename = asset.get("name") + if download_url and filename: + result.append((download_url, filename)) return result \ No newline at end of file diff --git a/src/sources/plain.py b/src/sources/plain.py index 0da025e..83e3a57 100644 --- a/src/sources/plain.py +++ b/src/sources/plain.py @@ -6,7 +6,7 @@ async def process_plain_url(item: CSInventoryItem, outdir: str) -> CSItem | None res_url = source.url if item.cache: - cache_url = cache_cheatsheet(source.url, outdir) + cache_url = await cache_cheatsheet(source.url, outdir) if cache_url: res_url = cache_url else: diff --git a/src/sources/util.py b/src/sources/util.py index d78d233..bc34ad8 100644 --- a/src/sources/util.py +++ b/src/sources/util.py @@ -1,22 +1,32 @@ import hashlib -import requests +import httpx import datetime import os +from pathlib import Path +from urllib.parse import urlparse def get_datestring() -> str: return datetime.datetime.now().strftime("%d.%m.%y") -def cache_cheatsheet(url, outdir: str) -> str | None: - r = requests.get(url) - if not r.ok and r.headers.get("Content-Type") != "application/pdf": - return None +async def cache_cheatsheet(url, outdir: str) -> str | None: + + print("Caching cheatsheet from", url) + + try: + async with httpx.AsyncClient() as client: + r = await client.get(url, timeout=5.0) + if not r.is_success and r.headers.get("Content-Type") != "application/pdf": + return None + except httpx.TimeoutException: + print("Timeout fetching URL:", url) + return None data = r.content - hashdata = hashlib.sha256(data) + url_base_name = Path(urlparse(url).path).stem - filesname = os.path.join("cache", f"{hashdata.hexdigest()}.pdf") + filesname = os.path.join("cache", f"{url_base_name}.pdf") if not os.path.exists(os.path.join(outdir, "cache")): os.mkdir(os.path.join(outdir, "cache")) diff --git a/static/robots.txt b/static/robots.txt new file mode 100644 index 0000000..894e799 --- /dev/null +++ b/static/robots.txt @@ -0,0 +1,4 @@ +User-agent: * +Disallow: / +Disallow: /impressum +Disallow: /cgi-bin/