mirror of
https://github.com/hoshikawa2/openclaw-oci-pptx.git
synced 2026-03-06 10:11:02 +00:00
First commit
This commit is contained in:
84
project/read_url_and_read_file.sh
Normal file
84
project/read_url_and_read_file.sh
Normal file
@@ -0,0 +1,84 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# Where to install scripts (default matches typical OpenClaw workspace folder)
|
||||
TARGET_DIR="${OPENCLAW_WORKDIR:-$HOME/.openclaw/workspace/openclaw_folder}"
|
||||
mkdir -p "$TARGET_DIR"
|
||||
|
||||
cat > "$TARGET_DIR/read_url" << 'EOF'
|
||||
#!/usr/bin/env python3
|
||||
import sys
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
def normalize_github_blob(url: str) -> str:
|
||||
# Convert github.com/.../blob/... to raw.githubusercontent.com/.../.../...
|
||||
if "github.com/" in url and "/blob/" in url:
|
||||
parts = url.split("github.com/", 1)[1].split("/blob/", 1)
|
||||
repo = parts[0].strip("/")
|
||||
rest = parts[1].lstrip("/")
|
||||
return f"https://raw.githubusercontent.com/{repo}/{rest}"
|
||||
return url
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: read_url <url>", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
url = normalize_github_blob(sys.argv[1])
|
||||
|
||||
try:
|
||||
r = requests.get(url, timeout=30)
|
||||
r.raise_for_status()
|
||||
content = r.text
|
||||
|
||||
# If HTML, extract visible text
|
||||
if "<html" in content.lower() or "<body" in content.lower():
|
||||
soup = BeautifulSoup(content, "html.parser")
|
||||
content = soup.get_text("\n")
|
||||
|
||||
print(content)
|
||||
|
||||
except Exception as e:
|
||||
print(f"ERROR: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
EOF
|
||||
|
||||
cat > "$TARGET_DIR/read_file" << 'EOF'
|
||||
#!/usr/bin/env python3
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
def read_pdf(path: Path) -> str:
|
||||
try:
|
||||
import fitz # PyMuPDF
|
||||
except Exception:
|
||||
raise RuntimeError("PyMuPDF (fitz) not installed. Install with: pip install pymupdf")
|
||||
doc = fitz.open(str(path))
|
||||
out = []
|
||||
for i in range(doc.page_count):
|
||||
out.append(doc.load_page(i).get_text("text"))
|
||||
return "\n".join(out)
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: read_file <path>", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
p = Path(sys.argv[1]).expanduser()
|
||||
if not p.exists():
|
||||
print(f"ERROR: file not found: {p}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
suffix = p.suffix.lower()
|
||||
try:
|
||||
if suffix == ".pdf":
|
||||
print(read_pdf(p))
|
||||
else:
|
||||
print(p.read_text(encoding="utf-8", errors="replace"))
|
||||
except Exception as e:
|
||||
print(f"ERROR: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
EOF
|
||||
|
||||
chmod +x "$TARGET_DIR/read_url" "$TARGET_DIR/read_file"
|
||||
|
||||
echo "✅ Installed: $TARGET_DIR/read_url and $TARGET_DIR/read_file"
|
||||
Reference in New Issue
Block a user