Zum Inhalt springen

Hilfe:LabelScan-Indexer

Aus ADOS Wiki
Version vom 9. November 2025, 15:56 Uhr von Admin (Diskussion | Beiträge) (Die Seite wurde neu angelegt: „<div class="box" style="max-width:820px;margin:1rem auto;padding:1rem;border:1px solid #e5e7eb;border-radius:12px;"> <h2>📦 LabelScan – Indexer (Auto-Save)</h2> <p>Erzeugt Embeddings lokal im Browser (CLIP) und schreibt sie automatisch nach <code>MediaWiki:Gadget-LabelScan-index.json</code>.</p> <label><b>Artikel-Titel</b> (genau wie im Wiki): <input id="idx-title" type="text" style="width:100%;padding:.5rem;margin:.25rem 0 .75rem;border:1p…“)
(Unterschied) ← Nächstältere Version | Aktuelle Version (Unterschied) | Nächstjüngere Version → (Unterschied)

📦 LabelScan – Indexer (Auto-Save)

Erzeugt Embeddings lokal im Browser (CLIP) und schreibt sie automatisch nach MediaWiki:Gadget-LabelScan-index.json.

 <label>Artikel-Titel (genau wie im Wiki):
   <input id="idx-title" type="text" style="width:100%;padding:.5rem;margin:.25rem 0 .75rem;border:1px solid #ddd;border-radius:8px">
 </label>
 <label>Thumb-URL (optional, 120–300px breit):
   <input id="idx-thumb" type="url" placeholder="thumb.jpg"
          style="width:100%;padding:.5rem;margin:.25rem 0 .75rem;border:1px solid #ddd;border-radius:8px">
 </label>
 <label>Bilddatei (Frontlabel-Foto):
   <input id="idx-file" type="file" accept="image/*" style="display:block;margin:.25rem 0 .75rem;">
 </label>
 <button id="idx-run" style="padding:.6rem .9rem;border-radius:10px;background:#2a4b8d;color:#fff;border:none;cursor:pointer">Embedding erzeugen & speichern</button>
 

Zuletzt erzeugter JSON-Eintrag

 <textarea id="idx-out" rows="5" style="width:100%;font-family:ui-monospace,Consolas,monospace;padding:.6rem;border:1px solid #ddd;border-radius:10px"></textarea>

<script> /* global mw */ (function(){

 const INDEX_TITLE = 'MediaWiki:Gadget-LabelScan-index.json';
 // ---- Modell-Config (wie in deinem Gadget) ----
 const transformersURL = 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.15.0';
 const MODEL_ID = 'Xenova/clip-vit-base-patch32';
 const LOCAL_MODEL_PATH = '/models';
 // ---- UI helpers ----
 const $ = id => document.getElementById(id);
 const status = (t) => { const el=$('idx-status'); if(el) el.textContent=t||; };
 // Rechtecheck
 function hasSysop(){
   const groups = mw.config.get('wgUserGroups') || [];
   return groups.includes('sysop') || groups.includes('interface-admin');
 }
 // Float32 → base64
 function float32ToBase64(vec){
   const bytes = new Uint8Array(vec.buffer);
   let bin = , chunk = 0x8000;
   for (let i=0; i<bytes.length; i+=chunk) {
     bin += String.fromCharCode.apply(null, bytes.subarray(i, i+chunk));
   }
   return btoa(bin);
 }
 // Optional: EXIF-korrekte Canvas-Erzeugung (Fallback ohne OffscreenCanvas)
 async function fileToCanvasExif(file){
   if ('createImageBitmap' in window) {
     const bmp = await createImageBitmap(file, { imageOrientation: 'from-image' });
     // OffscreenCanvas bevorzugen, fallback auf <canvas>
     if ('OffscreenCanvas' in window) {
       const c = new OffscreenCanvas(bmp.width, bmp.height);
       c.getContext('2d').drawImage(bmp, 0, 0);
       return c;
     } else {
       const c = document.createElement('canvas');
       c.width = bmp.width; c.height = bmp.height;
       c.getContext('2d').drawImage(bmp, 0, 0);
       return c;
     }
   } else {
     // klassischer Weg
     const url = URL.createObjectURL(file);
     try {
       const img = await new Promise((res, rej)=>{
         const im = new Image();
         im.onload = ()=>res(im);
         im.onerror = rej;
         im.src = url;
       });
       const c = document.createElement('canvas');
       c.width = img.width; c.height = img.height;
       c.getContext('2d').drawImage(img, 0, 0);
       return c;
     } finally {
       URL.revokeObjectURL(url);
     }
   }
 }
 // ---- Transformers laden (einmalig) ----
 let _load;
 async function ensureModel(){
   if (_load) return _load;
   _load = (async()=>{
     const mod = await import(/* webpackIgnore: true */ transformersURL);
     // Nur lokale Modelle (wie beim Gadget)
     mod.env.allowLocalModels = true;
     mod.env.allowRemoteModels = false;
     mod.env.localModelPath = LOCAL_MODEL_PATH;
     // (Optional) WebGPU bevorzugen – fallback bleibt wasm
     // mod.env.backends = mod.env.backends || {};
     // mod.env.backends.onnx = mod.env.backends.onnx || {};
     // mod.env.backends.onnx.preferredBackend = 'webgpu';
     // WASM-Runtime-Pfad (ort-wasm-simd.wasm)
     mod.env.backends = mod.env.backends || {};
     mod.env.backends.onnx = mod.env.backends.onnx || {};
     mod.env.backends.onnx.wasm = mod.env.backends.onnx.wasm || {};
     mod.env.backends.onnx.wasm.wasmPaths =
       'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.15.0/dist/';
     const [processor, model] = await Promise.all([
       mod.AutoProcessor.from_pretrained(MODEL_ID),
       mod.CLIPVisionModelWithProjection.from_pretrained(MODEL_ID, { quantized: true })
     ]);
     return { mod, processor, model };
   })();
   return _load;
 }
 async function buildEmbeddingFromFile(file){
   const { mod, processor, model } = await ensureModel();
   // Canvas (EXIF-korrigiert)
   const canvas = await fileToCanvasExif(file);
   // Canvas → Blob → RawImage (robust für Processor)
   const blob = (canvas.convertToBlob)
     ? await canvas.convertToBlob({ type:'image/jpeg', quality:0.95 })
     : await new Promise(r => canvas.toBlob(r, 'image/jpeg', 0.95));
   const imageRaw = await mod.RawImage.fromBlob(blob);
   const inputs = await processor(imageRaw, { return_tensors: 'pt' });
   const out = await model.forward({ pixel_values: inputs.pixel_values });
   const vec = out?.image_embeds?.data || out?.image_embeds;
   if (!(vec instanceof Float32Array)) throw new Error('Embedding-Format unerwartet');
   // Normieren
   let n=0; for(let i=0;i<vec.length;i++) n+=vec[i]*vec[i];
   const norm = Math.sqrt(n)||1;
   const v = new Float32Array(vec.length);
   for(let i=0;i<vec.length;i++) v[i]=vec[i]/norm;
   return v;
 }
 // ---- Index laden & speichern ----
 async function fetchIndexJSON(){
   const url = mw.util.getUrl(INDEX_TITLE, { action:'raw', ctype:'application/json' });
   const res = await fetch(url, { cache: 'no-store' });
   if (!res.ok) throw new Error('Index nicht ladbar: '+res.status);
   const txt = await res.text();
   // robust gegen leere/kaputte Inhalte
   let arr;
   try { arr = JSON.parse(txt || '[]'); }
   catch(_){ arr = []; }
   if (!Array.isArray(arr)) arr = [];
   return arr;
 }
 async function saveIndexJSON(newArray, summary){
   await mw.loader.using(['mediawiki.api']);
   const api = new mw.Api();
   // Hole aktuelle Seite, um Timestamp für Konflikt-Schutz zu haben
   const meta = await api.get({
     action: 'query',
     prop: 'revisions',
     titles: INDEX_TITLE,
     rvprop: 'timestamp|content',
     format: 'json'
   });
   const pages = meta?.query?.pages || {};
   const page = pages[Object.keys(pages)[0]];
   const baseTimestamp = page?.revisions?.[0]?.timestamp;
   const text = JSON.stringify(newArray, null, 2) + '\n';
   try {
     const res = await api.postWithToken('csrf', {
       action: 'edit',
       title: INDEX_TITLE,
       text,
       summary: summary || 'LabelScan: +1 embedding (Auto-Indexer)',
       nocreate: 0,
       bot: 1,
       basetimestamp: baseTimestamp
     });
     return res;
   } catch (e) {
     // einfacher Retry bei Konflikt: neu holen und erneut schreiben
     if ((e?.details||).includes('editconflict')) {
       const fresh = await fetchIndexJSON();
       const merged = mergeArraysUnique(fresh, newArray); // simple Merge, Dedupe
       const text2 = JSON.stringify(merged, null, 2) + '\n';
       return api.postWithToken('csrf', {
         action: 'edit',
         title: INDEX_TITLE,
         text: text2,
         summary: (summary || 'LabelScan: +1 embedding (Auto-Indexer)') + ' (merge)',
         nocreate: 0,
         bot: 1
       });
     }
     throw e;
   }
 }
 // einfache Duplikat-Entfernung (identische title+embed)
 function mergeArraysUnique(base, add){
   const seen = new Set(base.map(x => (x.title||)+'|'+(x.embed||)));
   for (const it of add) {
     const key = (it.title||)+'|'+(it.embed||);
     if (!seen.has(key)) { base.push(it); seen.add(key); }
   }
   return base;
 }
 // ---- Klick-Handler ----
 $('idx-run').addEventListener('click', async ()=>{
   try{
     if (!hasSysop()) {
       alert('Du brauchst Admin-Rechte (sysop/interface-admin), um den Index automatisch zu speichern.');
       return;
     }
     const title = $('idx-title').value.trim();
     const thumb = $('idx-thumb').value.trim();
     const file  = $('idx-file').files?.[0];
     if (!title) return alert('Bitte Artikel-Titel eingeben.');
     if (!file)  return alert('Bitte Bilddatei wählen.');
     status('Modell laden …');
     await ensureModel();
     status('Embedding berechnen …');
     const vec = await buildEmbeddingFromFile(file);
     const b64 = float32ToBase64(vec);
     // Vorschau
     $('idx-preview').innerHTML = ;
     const u = URL.createObjectURL(file);
     const img = document.createElement('img');
     img.src = u; img.style.maxWidth='280px'; img.style.borderRadius='10px';
     $('idx-preview').appendChild(img);
     const newRow = { title, thumb: thumb || , embed: b64 };
     $('idx-out').value = JSON.stringify(newRow);
     status('Index laden …');
     const arr = await fetchIndexJSON();
     const merged = mergeArraysUnique(arr, [newRow]);
     status('Speichern …');
     await saveIndexJSON(merged, `LabelScan: +1 embedding für "${title}"`);
     status('Gespeichert ✅');
   } catch(e){
     console.error(e);
     status('Fehler: ' + (e?.message || e));
     alert('Fehler beim Speichern:\n' + (e?.message || e));
   }
 });

})(); </script>