Hilfe:LabelScan-Indexer: Unterschied zwischen den Versionen

Keine Bearbeitungszusammenfassung
Keine Bearbeitungszusammenfassung
 
Zeile 2: Zeile 2:
<div class="box" style="max-width:820px;margin:1rem auto;padding:1rem;border:1px solid #e5e7eb;border-radius:12px;">
<div class="box" style="max-width:820px;margin:1rem auto;padding:1rem;border:1px solid #e5e7eb;border-radius:12px;">
   <h2>📦 LabelScan – Indexer (Auto-Save)</h2>
   <h2>📦 LabelScan – Indexer (Auto-Save)</h2>
  <p>Erzeugt Embeddings lokal im Browser (CLIP) und schreibt sie automatisch nach <code>MediaWiki:Gadget-LabelScan-index.json</code>.</p>


   <label><b>Artikel-Titel</b> (genau wie im Wiki):
   <label><b>Artikel-Titel</b><br>
     <input id="idx-title" type="text" style="width:100%;padding:.5rem;margin:.25rem 0 .75rem;border:1px solid #ddd;border-radius:8px">
     <input id="idx-title" type="text" style="width:100%;padding:.5rem;margin:.3rem 0;border:1px solid #ddd;border-radius:8px">
   </label>
   </label>


   <label><b>Thumb-URL</b> (optional, 120–300px breit):
   <label><b>Thumb-URL</b> (optional)<br>
     <input id="idx-thumb" type="url" placeholder="https://ados-wiki.de/images/.../thumb.jpg"
     <input id="idx-thumb" type="url" style="width:100%;padding:.5rem;margin:.3rem 0;border:1px solid #ddd;border-radius:8px">
          style="width:100%;padding:.5rem;margin:.25rem 0 .75rem;border:1px solid #ddd;border-radius:8px">
   </label>
   </label>


   <label><b>Bilddatei</b> (Frontlabel-Foto):
   <label><b>Bilddatei</b><br>
     <input id="idx-file" type="file" accept="image/*" style="display:block;margin:.25rem 0 .75rem;">
     <input id="idx-file" type="file" accept="image/*" style="display:block;margin:.3rem 0 1rem;">
   </label>
   </label>


   <button id="idx-run" style="padding:.6rem .9rem;border-radius:10px;background:#2a4b8d;color:#fff;border:none;cursor:pointer">Embedding erzeugen &amp; speichern</button>
   <button id="idx-run" style="padding:.6rem .9rem;border-radius:10px;background:#2a4b8d;color:#fff;border:none;cursor:pointer">
    Embedding erzeugen & speichern
  </button>
   <span id="idx-status" style="margin-left:.75rem;color:#555;"></span>
   <span id="idx-status" style="margin-left:.75rem;color:#555;"></span>


Zeile 23: Zeile 23:


   <h3>Zuletzt erzeugter JSON-Eintrag</h3>
   <h3>Zuletzt erzeugter JSON-Eintrag</h3>
   <textarea id="idx-out" rows="5" style="width:100%;font-family:ui-monospace,Consolas,monospace;padding:.6rem;border:1px solid #ddd;border-radius:10px"></textarea>
   <textarea id="idx-out" rows="5" style="width:100%;font-family:monospace;padding:.6rem;border:1px solid #ddd;border-radius:10px"></textarea>
</div>
</div>
<script>
/* global mw */
(function(){
  const INDEX_TITLE = 'MediaWiki:Gadget-LabelScan-index.json';
  // ---- Modell-Config (wie in deinem Gadget) ----
  const transformersURL = 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.15.0';
  const MODEL_ID = 'Xenova/clip-vit-base-patch32';
  const LOCAL_MODEL_PATH = '/models';
  // ---- UI helpers ----
  const $ = id => document.getElementById(id);
  const status = (t) => { const el=$('idx-status'); if(el) el.textContent=t||''; };
  // Rechtecheck
  function hasSysop(){
    const groups = mw.config.get('wgUserGroups') || [];
    return groups.includes('sysop') || groups.includes('interface-admin');
  }
  // Float32 → base64
  function float32ToBase64(vec){
    const bytes = new Uint8Array(vec.buffer);
    let bin = '', chunk = 0x8000;
    for (let i=0; i<bytes.length; i+=chunk) {
      bin += String.fromCharCode.apply(null, bytes.subarray(i, i+chunk));
    }
    return btoa(bin);
  }
  // Optional: EXIF-korrekte Canvas-Erzeugung (Fallback ohne OffscreenCanvas)
  async function fileToCanvasExif(file){
    if ('createImageBitmap' in window) {
      const bmp = await createImageBitmap(file, { imageOrientation: 'from-image' });
      // OffscreenCanvas bevorzugen, fallback auf <canvas>
      if ('OffscreenCanvas' in window) {
        const c = new OffscreenCanvas(bmp.width, bmp.height);
        c.getContext('2d').drawImage(bmp, 0, 0);
        return c;
      } else {
        const c = document.createElement('canvas');
        c.width = bmp.width; c.height = bmp.height;
        c.getContext('2d').drawImage(bmp, 0, 0);
        return c;
      }
    } else {
      // klassischer Weg
      const url = URL.createObjectURL(file);
      try {
        const img = await new Promise((res, rej)=>{
          const im = new Image();
          im.onload = ()=>res(im);
          im.onerror = rej;
          im.src = url;
        });
        const c = document.createElement('canvas');
        c.width = img.width; c.height = img.height;
        c.getContext('2d').drawImage(img, 0, 0);
        return c;
      } finally {
        URL.revokeObjectURL(url);
      }
    }
  }
  // ---- Transformers laden (einmalig) ----
  let _load;
  async function ensureModel(){
    if (_load) return _load;
    _load = (async()=>{
      const mod = await import(/* webpackIgnore: true */ transformersURL);
      // Nur lokale Modelle (wie beim Gadget)
      mod.env.allowLocalModels = true;
      mod.env.allowRemoteModels = false;
      mod.env.localModelPath = LOCAL_MODEL_PATH;
      // (Optional) WebGPU bevorzugen – fallback bleibt wasm
      // mod.env.backends = mod.env.backends || {};
      // mod.env.backends.onnx = mod.env.backends.onnx || {};
      // mod.env.backends.onnx.preferredBackend = 'webgpu';
      // WASM-Runtime-Pfad (ort-wasm-simd.wasm)
      mod.env.backends = mod.env.backends || {};
      mod.env.backends.onnx = mod.env.backends.onnx || {};
      mod.env.backends.onnx.wasm = mod.env.backends.onnx.wasm || {};
      mod.env.backends.onnx.wasm.wasmPaths =
        'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.15.0/dist/';
      const [processor, model] = await Promise.all([
        mod.AutoProcessor.from_pretrained(MODEL_ID),
        mod.CLIPVisionModelWithProjection.from_pretrained(MODEL_ID, { quantized: true })
      ]);
      return { mod, processor, model };
    })();
    return _load;
  }
  async function buildEmbeddingFromFile(file){
    const { mod, processor, model } = await ensureModel();
    // Canvas (EXIF-korrigiert)
    const canvas = await fileToCanvasExif(file);
    // Canvas → Blob → RawImage (robust für Processor)
    const blob = (canvas.convertToBlob)
      ? await canvas.convertToBlob({ type:'image/jpeg', quality:0.95 })
      : await new Promise(r => canvas.toBlob(r, 'image/jpeg', 0.95));
    const imageRaw = await mod.RawImage.fromBlob(blob);
    const inputs = await processor(imageRaw, { return_tensors: 'pt' });
    const out = await model.forward({ pixel_values: inputs.pixel_values });
    const vec = out?.image_embeds?.data || out?.image_embeds;
    if (!(vec instanceof Float32Array)) throw new Error('Embedding-Format unerwartet');
    // Normieren
    let n=0; for(let i=0;i<vec.length;i++) n+=vec[i]*vec[i];
    const norm = Math.sqrt(n)||1;
    const v = new Float32Array(vec.length);
    for(let i=0;i<vec.length;i++) v[i]=vec[i]/norm;
    return v;
  }
  // ---- Index laden & speichern ----
  async function fetchIndexJSON(){
    const url = mw.util.getUrl(INDEX_TITLE, { action:'raw', ctype:'application/json' });
    const res = await fetch(url, { cache: 'no-store' });
    if (!res.ok) throw new Error('Index nicht ladbar: '+res.status);
    const txt = await res.text();
    // robust gegen leere/kaputte Inhalte
    let arr;
    try { arr = JSON.parse(txt || '[]'); }
    catch(_){ arr = []; }
    if (!Array.isArray(arr)) arr = [];
    return arr;
  }
  async function saveIndexJSON(newArray, summary){
    await mw.loader.using(['mediawiki.api']);
    const api = new mw.Api();
    // Hole aktuelle Seite, um Timestamp für Konflikt-Schutz zu haben
    const meta = await api.get({
      action: 'query',
      prop: 'revisions',
      titles: INDEX_TITLE,
      rvprop: 'timestamp|content',
      format: 'json'
    });
    const pages = meta?.query?.pages || {};
    const page = pages[Object.keys(pages)[0]];
    const baseTimestamp = page?.revisions?.[0]?.timestamp;
    const text = JSON.stringify(newArray, null, 2) + '\n';
    try {
      const res = await api.postWithToken('csrf', {
        action: 'edit',
        title: INDEX_TITLE,
        text,
        summary: summary || 'LabelScan: +1 embedding (Auto-Indexer)',
        nocreate: 0,
        bot: 1,
        basetimestamp: baseTimestamp
      });
      return res;
    } catch (e) {
      // einfacher Retry bei Konflikt: neu holen und erneut schreiben
      if ((e?.details||'').includes('editconflict')) {
        const fresh = await fetchIndexJSON();
        const merged = mergeArraysUnique(fresh, newArray); // simple Merge, Dedupe
        const text2 = JSON.stringify(merged, null, 2) + '\n';
        return api.postWithToken('csrf', {
          action: 'edit',
          title: INDEX_TITLE,
          text: text2,
          summary: (summary || 'LabelScan: +1 embedding (Auto-Indexer)') + ' (merge)',
          nocreate: 0,
          bot: 1
        });
      }
      throw e;
    }
  }
  // einfache Duplikat-Entfernung (identische title+embed)
  function mergeArraysUnique(base, add){
    const seen = new Set(base.map(x => (x.title||'')+'|'+(x.embed||'')));
    for (const it of add) {
      const key = (it.title||'')+'|'+(it.embed||'');
      if (!seen.has(key)) { base.push(it); seen.add(key); }
    }
    return base;
  }
  // ---- Klick-Handler ----
  $('idx-run').addEventListener('click', async ()=>{
    try{
      if (!hasSysop()) {
        alert('Du brauchst Admin-Rechte (sysop/interface-admin), um den Index automatisch zu speichern.');
        return;
      }
      const title = $('idx-title').value.trim();
      const thumb = $('idx-thumb').value.trim();
      const file  = $('idx-file').files?.[0];
      if (!title) return alert('Bitte Artikel-Titel eingeben.');
      if (!file)  return alert('Bitte Bilddatei wählen.');
      status('Modell laden …');
      await ensureModel();
      status('Embedding berechnen …');
      const vec = await buildEmbeddingFromFile(file);
      const b64 = float32ToBase64(vec);
      // Vorschau
      $('idx-preview').innerHTML = '';
      const u = URL.createObjectURL(file);
      const img = document.createElement('img');
      img.src = u; img.style.maxWidth='280px'; img.style.borderRadius='10px';
      $('idx-preview').appendChild(img);
      const newRow = { title, thumb: thumb || '', embed: b64 };
      $('idx-out').value = JSON.stringify(newRow);
      status('Index laden …');
      const arr = await fetchIndexJSON();
      const merged = mergeArraysUnique(arr, [newRow]);
      status('Speichern …');
      await saveIndexJSON(merged, `LabelScan: +1 embedding für "${title}"`);
      status('Gespeichert ✅');
    } catch(e){
      console.error(e);
      status('Fehler: ' + (e?.message || e));
      alert('Fehler beim Speichern:\n' + (e?.message || e));
    }
  });
})();
</script>
}}
}}