Thursday, July 2, 2026

Utilizing Carry to Flip Analysis PDFs into Structured JSON with Managed, Schema-Guided Subject-Degree Analysis


def render_pdf(d, path):
   """Draw a practical 3-page report. Web page breaks are compelled so the headline metric on
   web page 1 (summary) is bodily separated from the outcomes desk on web page 3."""
   from reportlab.lib.pagesizes import LETTER
   from reportlab.lib.types import getSampleStyleSheet, ParagraphStyle
   from reportlab.lib.models import inch
   from reportlab.lib import colours
   from reportlab.platypus import (SimpleDocTemplate, Paragraph, Spacer,
                                   Desk, TableStyle, PageBreak)
   ss = getSampleStyleSheet()
   H1   = ParagraphStyle("H1", mother or father=ss["Title"], fontSize=16, main=20, spaceAfter=6)
   AUTH = ParagraphStyle("AUTH", mother or father=ss["Normal"], fontSize=9.5, textColor=colours.gray, spaceAfter=10)
   H2   = ParagraphStyle("H2", mother or father=ss["Heading2"], fontSize=12, spaceBefore=8, spaceAfter=4)
   BODY = ParagraphStyle("BODY", mother or father=ss["Normal"], fontSize=10, main=14, spaceAfter=6)
   sota_phrase = (f"surpassing the earlier better of {d['prior_best']}"
                  if d["beats_sota"] else
                  f"approaching however not exceeding the earlier better of {d['prior_best']}")
   authors_line = ", ".be a part of(f"{n} ({a})" for (n, a) in d["authors"])
   story = []
   story += [Paragraph(d["title"], H1), Paragraph(authors_line, AUTH), Paragraph("Summary", H2)]
   story += [Paragraph(
       f"We introduce {d['method']}, a mannequin for {d['task']}. On the {d['primary_benchmark']} "
       f"benchmark, {d['method']} attains {d['test_acc']} {d['metric_name']} on the held-out "
       f"check set, {sota_phrase}. Our {d['params_m']}M-parameter mannequin is evaluated throughout "
       f"{len(d['datasets'])} datasets ({', '.be a part of(d['datasets'])}). "
       f"In depth ablations verify the contribution of every part.", BODY)]
   story += [Paragraph("Keywords", H2),
             Paragraph(f"{d['task']}; illustration studying; {d['primary_benchmark']}", BODY),
             PageBreak()]
   story += [Paragraph("1  Method and Training Details", H2)]
   story += [Paragraph(
       f"{d['method']} is educated end-to-end with the {d['optimizer']} optimizer. "
       f"We tune on a validation cut up and report ultimate numbers on the check cut up. "
       f"The complete coaching configuration is summarized in Desk 1.", BODY)]
   hp = [["Hyperparameter", "Value"],
         ["Optimizer", d["optimizer"]],
         ["Learning rate", str(d["lr"])],
         ["Batch size", str(d["batch"])],
         ["Epochs", str(d["epochs"])],
         ["Parameters", f"{d['params_m']}M"]]
   t1 = Desk(hp, colWidths=[2.4 * inch, 2.0 * inch])
   t1.setStyle(TableStyle([
       ("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#2b3a67")),
       ("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
       ("FONTSIZE", (0, 0), (-1, -1), 9.5),
       ("GRID", (0, 0), (-1, -1), 0.4, colors.grey),
       ("ROWBACKGROUNDS", (0, 1), (-1, -1), [colors.white, colors.HexColor("#eef1f8")]),
       ("LEFTPADDING", (0, 0), (-1, -1), 8), ("TOPPADDING", (0, 0), (-1, -1), 4),
       ("BOTTOMPADDING", (0, 0), (-1, -1), 4)]))
   story += [Spacer(1, 4), t1, Spacer(1, 6),
             Paragraph("Table 1. Training configuration.", BODY),
             Paragraph("2  Datasets", H2),
             Paragraph(
                 f"We evaluate on {', '.join(d['datasets'])}. {d['primary_benchmark']} is our "
                 f"major benchmark; the remaining datasets are used for generalization "
                 f"research.", BODY),
             PageBreak()]
   story += [Paragraph("3  Results", H2)]
   res = [["Method", f"Val. {d['metric_name']}", f"Take a look at {d['metric_name']}"],
          [f"{d['baseline_name']} (baseline)", str(d["baseline_val"]), str(d["baseline_test"])],
          [f"{d['method']} (ours)", str(d["val_acc"]), str(d["test_acc"])]]
   t2 = Desk(res, colWidths=[2.6 * inch, 1.7 * inch, 1.7 * inch])
   t2.setStyle(TableStyle([
       ("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#7a2e2e")),
       ("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
       ("FONTSIZE", (0, 0), (-1, -1), 9.5),
       ("GRID", (0, 0), (-1, -1), 0.4, colors.grey),
       ("FONTNAME", (0, 2), (-1, 2), "Helvetica-Bold"),
       ("ROWBACKGROUNDS", (0, 1), (-1, -1), [colors.white, colors.HexColor("#f7eeee")]),
       ("LEFTPADDING", (0, 0), (-1, -1), 8), ("TOPPADDING", (0, 0), (-1, -1), 4),
       ("BOTTOMPADDING", (0, 0), (-1, -1), 4)]))
   story += [Spacer(1, 4), t2, Spacer(1, 6),
             Paragraph(f"Table 2. Results on {d['primary_benchmark']}. "
                       f"Greatest check end in daring.", BODY),
             Paragraph("4  Limitations", H2)]
   for lim in d["limitations"]:
       story += [Paragraph("• " + lim, BODY)]
   story += [Paragraph("5  Funding and Code Availability", H2),
             Paragraph(d["funding_note"], BODY)]
   SimpleDocTemplate(path, pagesize=LETTER,
                     topMargin=0.8 * inch, bottomMargin=0.8 * inch,
                     leftMargin=0.9 * inch, rightMargin=0.9 * inch).construct(story)
print("STEP 3/7 · Producing artificial report PDFs…")
CORPUS = []
for i, d in enumerate(DOCS):
   path = f"/content material/report_{i}.pdf" if os.path.isdir("/content material") else f"report_{i}.pdf"
   render_pdf(d, path)
   CORPUS.append((d, ground_truth(d), path))
   print(f"     ✓ {os.path.basename(path)}  —  {d['method']}")
print()
if SHOW_FIRST_PAGE:
   strive:
       import pypdfium2 as pdfium, matplotlib.pyplot as plt
       pg  = pdfium.PdfDocument(CORPUS[0][2])[0]
       img = pg.render(scale=2.0).to_pil()
       plt.determine(figsize=(6.4, 8.3)); plt.imshow(img); plt.axis("off")
       plt.title("What carry reads — web page 1 of report_0.pdf", fontsize=10); plt.present()
   besides Exception as e:
       print("     (web page preview skipped:", e, ")n")

Related Articles

Latest Articles