moved to asyncd

This commit is contained in:
alexander
2026-01-23 09:21:42 +01:00
parent efc2116de4
commit e93e743f65
11 changed files with 106 additions and 76 deletions

4
.gitignore vendored
View File

@@ -5,4 +5,6 @@ node_modules
__pycache__/ __pycache__/
package-lock.json package-lock.json
package.json package.json
cheatsheet_inventory.json

View File

@@ -1,10 +1,12 @@
FROM python:3.12-slim FROM python:3.12-alpine
WORKDIR /app WORKDIR /app
RUN apk add --no-cache curl git libsass
COPY requirements.txt . COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt RUN pip install --no-cache-dir -r requirements.txt
COPY . . COPY . .
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] CMD ["python", "src/main.py"]

View File

@@ -1,6 +0,0 @@
services:
server_html:
build: .
ports:
- "8000:8000"

View File

@@ -1,22 +1,32 @@
annotated-types==0.7.0 annotated-types==0.7.0
anyio==4.12.1
blinker==1.9.0 blinker==1.9.0
certifi==2026.1.4 certifi==2026.1.4
charset-normalizer==3.4.4 charset-normalizer==3.4.4
click==8.3.1 click==8.3.1
fastapi==0.115.0
Flask==3.1.2 Flask==3.1.2
h11==0.16.0
httpcore==1.0.9
httptools==0.7.1
httpx==0.28.1
idna==3.11 idna==3.11
itsdangerous==2.2.0 itsdangerous==2.2.0
Jinja2==3.1.6 Jinja2==3.1.6
libsass==0.23.0
livereload==2.7.1 livereload==2.7.1
MarkupSafe==3.0.3 MarkupSafe==3.0.3
pydantic==2.12.5 pydantic==2.12.5
pydantic_core==2.41.5 pydantic_core==2.41.5
python-dotenv==1.2.1
PyYAML==6.0.3
requests==2.32.5 requests==2.32.5
starlette==0.38.6
tornado==6.5.4 tornado==6.5.4
typing-inspection==0.4.2 typing-inspection==0.4.2
typing_extensions==4.15.0 typing_extensions==4.15.0
urllib3==2.6.3 urllib3==2.6.3
uvicorn==0.30.1
uvloop==0.22.1
watchfiles==1.1.1
websockets==16.0
Werkzeug==3.1.5 Werkzeug==3.1.5
fastapi==0.115.0
uvicorn[standard]==0.30.1

View File

@@ -51,6 +51,7 @@ async def build(trigger_list: list[str] | None = None):
# Copy to prod # Copy to prod
print("Copying to prod directory...") print("Copying to prod directory...")
shutil.copytree(OUTPUT_DIR, PROD_DIR, dirs_exist_ok=True) shutil.copytree(OUTPUT_DIR, PROD_DIR, dirs_exist_ok=True)
print("Done.")
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -38,6 +38,7 @@ async def prepare_cheatsheets(config: CSInventoryConfig, outdir: str) -> list[CS
print("Unknow Source Type:", item.source.type) print("Unknow Source Type:", item.source.type)
except: except:
traceback.print_exc() traceback.print_exc()
print("Error processing item:", item)
new_item = None new_item = None
if new_items: if new_items:

View File

@@ -55,5 +55,6 @@ async def trigger_all():
if __name__ == "__main__": if __name__ == "__main__":
import uvicorn import uvicorn
uvicorn.run("main:app", host="0.0.0.0", port=8000) uvicorn.run(app, host="0.0.0.0", port=8000, log_level="debug")

View File

@@ -1,23 +1,27 @@
from sources import CSSourceGitea, CSItem, CSInventoryItem from sources import CSSourceGitea, CSItem, CSInventoryItem
from sources.util import cache_cheatsheet, get_datestring from sources.util import cache_cheatsheet, get_datestring
import requests import httpx
from pathlib import Path from pathlib import Path
async def process_gitea(item: CSInventoryItem, outdir: str) -> list[CSItem] | None: async def process_gitea(item: CSInventoryItem, outdir: str) -> list[CSItem] | None:
source: CSSourceGitea = item.source source: CSSourceGitea = item.source
commit_hash = get_release_commit_sha(source.base_url, source.owner, source.repo, source.tag) commit_hash = await get_release_commit_sha(source.base_url, source.owner, source.repo, source.tag)
asserts = list_release_assets(source.base_url, source.owner, source.repo, source.tag) asserts = await list_release_assets(source.base_url, source.owner, source.repo, source.tag)
asserts = filter(lambda a: a[1].endswith(".pdf"), asserts) asserts = list(filter(lambda a: a[1].endswith(".pdf"), asserts))
asserts = list(map(lambda a: (a[0], f"{source.base_url}/repos/{source.owner}/{source.repo}/releases/download/{source.tag}/{a[0]}"), asserts))
print(f"Found {len(asserts)} PDF assets in Gitea release {source.owner}/{source.repo}@{source.tag}")
res = [] res = []
for a in asserts: for a in asserts:
res_url = a[0] res_url = a[0]
if item.cache: if item.cache:
cache_url = cache_cheatsheet(a[0], outdir) cache_url = await cache_cheatsheet(a[0], outdir)
if cache_url: if cache_url:
res_url = cache_url res_url = cache_url
else: else:
@@ -38,7 +42,7 @@ async def process_gitea(item: CSInventoryItem, outdir: str) -> list[CSItem] | No
return res return res
def get_release_commit_sha(base_url, owner, repo, tag_name, token=None): async def get_release_commit_sha(base_url, owner, repo, tag_name, token=None):
""" """
Resolve the commit SHA for a Gitea release tag. Resolve the commit SHA for a Gitea release tag.
@@ -50,49 +54,48 @@ def get_release_commit_sha(base_url, owner, repo, tag_name, token=None):
:return: commit SHA (str) :return: commit SHA (str)
""" """
headers = {}
if token:
headers["Authorization"] = f"token {token}"
session = requests.Session() async with httpx.AsyncClient() as client:
session.headers.update(headers) headers = {}
if token:
headers["Authorization"] = f"token {token}"
# 1) List tags and find the matching tag # 1) List tags and find the matching tag
tags_url = f"{base_url}/api/v1/repos/{owner}/{repo}/tags" tags_url = f"{base_url}/api/v1/repos/{owner}/{repo}/tags"
resp = session.get(tags_url) resp = await client.get(tags_url, headers=headers)
resp.raise_for_status() resp.raise_for_status()
tags = resp.json() tags = resp.json()
tag = next((t for t in tags if t["name"] == tag_name), None) tag = next((t for t in tags if t["name"] == tag_name), None)
if not tag: if not tag:
raise ValueError(f"Tag '{tag_name}' not found") raise ValueError(f"Tag '{tag_name}' not found")
# Lightweight tags usually already contain the commit SHA # Lightweight tags usually already contain the commit SHA
commit_sha = tag.get("commit", {}).get("sha") commit_sha = tag.get("commit", {}).get("sha")
tag_obj_sha = tag.get("id") tag_obj_sha = tag.get("id")
# If commit.sha looks valid, return it # If commit.sha looks valid, return it
if commit_sha: if commit_sha:
return commit_sha return commit_sha
# 2) Annotated tag: dereference via /git/tags/{sha} # 2) Annotated tag: dereference via /git/tags/{sha}
if not tag_obj_sha: if not tag_obj_sha:
raise RuntimeError("Tag object SHA missing; cannot dereference annotated tag") raise RuntimeError("Tag object SHA missing; cannot dereference annotated tag")
git_tag_url = f"{base_url}/api/v1/repos/{owner}/{repo}/git/tags/{tag_obj_sha}" git_tag_url = f"{base_url}/api/v1/repos/{owner}/{repo}/git/tags/{tag_obj_sha}"
resp = session.get(git_tag_url) resp = await client.get(git_tag_url, headers=headers)
resp.raise_for_status() resp.raise_for_status()
annotated = resp.json() annotated = resp.json()
# The object pointed to by the tag (usually a commit) # The object pointed to by the tag (usually a commit)
target = annotated.get("object", {}) target = annotated.get("object", {})
if target.get("type") != "commit": if target.get("type") != "commit":
raise RuntimeError(f"Tag points to a {target.get('type')} instead of a commit") raise RuntimeError(f"Tag points to a {target.get('type')} instead of a commit")
return target.get("sha") return target.get("sha")
def list_release_assets(base_url, owner, repo, tag, token=None): async def list_release_assets(base_url, owner, repo, tag, token=None):
""" """
Return a list of (download_url, filename) for all assets of a Gitea release. Return a list of (download_url, filename) for all assets of a Gitea release.
@@ -103,26 +106,28 @@ def list_release_assets(base_url, owner, repo, tag, token=None):
:param token: optional API token :param token: optional API token
:returns: list of (download_url, filename) tuples :returns: list of (download_url, filename) tuples
""" """
headers = {}
if token:
headers["Authorization"] = f"token {token}"
# 1) Get release by tag async with httpx.AsyncClient() as client:
rel_url = f"{base_url}/api/v1/repos/{owner}/{repo}/releases/tags/{tag}" headers = {}
rel_resp = requests.get(rel_url, headers=headers) if token:
rel_resp.raise_for_status() headers["Authorization"] = f"token {token}"
release = rel_resp.json()
assets = release.get("assets", []) # 1) Get release by tag
result = [] rel_url = f"{base_url}/api/v1/repos/{owner}/{repo}/releases/tags/{tag}"
rel_resp = await client.get(rel_url, headers=headers)
rel_resp.raise_for_status()
release: dict = rel_resp.json()
for asset in assets: assets = release.get("assets", [])
# Gitea asset info usually contains: result = []
# - "browser_download_url" → direct URL
# - "name" → filename for asset in assets:
download_url = asset.get("browser_download_url") # Gitea asset info usually contains:
filename = asset.get("name") # - "browser_download_url" → direct URL
if download_url and filename: # - "name" → filename
result.append((download_url, filename)) download_url = asset.get("browser_download_url")
filename = asset.get("name")
if download_url and filename:
result.append((download_url, filename))
return result return result

View File

@@ -6,7 +6,7 @@ async def process_plain_url(item: CSInventoryItem, outdir: str) -> CSItem | None
res_url = source.url res_url = source.url
if item.cache: if item.cache:
cache_url = cache_cheatsheet(source.url, outdir) cache_url = await cache_cheatsheet(source.url, outdir)
if cache_url: if cache_url:
res_url = cache_url res_url = cache_url
else: else:

View File

@@ -1,22 +1,32 @@
import hashlib import hashlib
import requests import httpx
import datetime import datetime
import os import os
from pathlib import Path
from urllib.parse import urlparse
def get_datestring() -> str: def get_datestring() -> str:
return datetime.datetime.now().strftime("%d.%m.%y") return datetime.datetime.now().strftime("%d.%m.%y")
def cache_cheatsheet(url, outdir: str) -> str | None: async def cache_cheatsheet(url, outdir: str) -> str | None:
r = requests.get(url)
if not r.ok and r.headers.get("Content-Type") != "application/pdf": print("Caching cheatsheet from", url)
return None
try:
async with httpx.AsyncClient() as client:
r = await client.get(url, timeout=5.0)
if not r.is_success and r.headers.get("Content-Type") != "application/pdf":
return None
except httpx.TimeoutException:
print("Timeout fetching URL:", url)
return None
data = r.content data = r.content
hashdata = hashlib.sha256(data) url_base_name = Path(urlparse(url).path).stem
filesname = os.path.join("cache", f"{hashdata.hexdigest()}.pdf") filesname = os.path.join("cache", f"{url_base_name}.pdf")
if not os.path.exists(os.path.join(outdir, "cache")): if not os.path.exists(os.path.join(outdir, "cache")):
os.mkdir(os.path.join(outdir, "cache")) os.mkdir(os.path.join(outdir, "cache"))

4
static/robots.txt Normal file
View File

@@ -0,0 +1,4 @@
User-agent: *
Disallow: /
Disallow: /impressum
Disallow: /cgi-bin/