crawl: add equivalence auto research learning record
This commit is contained in:
parent
850083f3d4
commit
4343cd2f7d
@ -1,2 +1,3 @@
|
|||||||
{"id":"robot-exp-f3015c7c20d8","source":"robot-experience:status:codex-verification-robot","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIPLLM, an expert AI assistant for the Transceiver Intelligence Platform (TIP). You have deep knowledge of optical transceiver specifications, form factors, IEEE standards, vendor product catalogs, and fiber optic networking. You help engineers select, source, and validate transceivers. You provide precise, structured answers with confidence scores and always cite your reasoning."},{"role":"user","content":"You are TIPLLM controlling TIP crawler and verification robots.\nGiven this current observation, choose the safest next robot action.\n\n{\n \"event\": \"status\",\n \"actor\": \"codex-verification-robot\",\n \"input\": {\n \"live_total\": 13546,\n \"price_verified\": 7250,\n \"image_verified\": 7025,\n \"details_verified\": 6243,\n \"fully_verified\": 5812,\n \"requested_policy\": \"TIPLLM only; no other AI; record experiences in Gitea training pool\"\n },\n \"safety_notes\": [\n \"No crawler jobs were started.\",\n \"No non-TIPLLM AI is added to the runtime design.\",\n \"This record is intended for TIPLLM SFT training.\"\n ]\n}"},{"role":"assistant","content":"{\n \"summary\": \"TIP verification robot policy updated: use TIPLLM only for crawler planning and write robot experience back to the TIPLLM training pool.\",\n \"decision\": {\n \"ai_policy\": \"TIPLLM_ONLY\",\n \"erik_policy\": \"controller_only_no_heavy_crawlers\",\n \"training_pool\": \"write robot status, TIPLLM plans, queue plans, enqueue outcomes and crawler results as SFT records\",\n \"deployment_policy\": \"Proxmox/Pi for heavy crawler work; Erik only safe queues and DB/pg-boss coordination\"\n },\n \"outcome\": {\n \"code_paths\": [\n \"packages/scraper/src/crawler-llm/training-data-writer.ts\",\n \"packages/scraper/src/robots/verification-robots.ts\",\n \"scripts/tip-learning-pool-build.ts\"\n ],\n \"crawl_jobs_started\": 0\n },\n \"constraints\": [\n \"Use TIPLLM only for planning/extraction feedback.\",\n \"Do not start heavy crawler waves on Erik.\",\n \"Prefer small, observable queue batches.\",\n \"Write useful outcomes back into the TIPLLM training pool.\"\n ]\n}"}]}
|
{"id":"robot-exp-f3015c7c20d8","source":"robot-experience:status:codex-verification-robot","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIPLLM, an expert AI assistant for the Transceiver Intelligence Platform (TIP). You have deep knowledge of optical transceiver specifications, form factors, IEEE standards, vendor product catalogs, and fiber optic networking. You help engineers select, source, and validate transceivers. You provide precise, structured answers with confidence scores and always cite your reasoning."},{"role":"user","content":"You are TIPLLM controlling TIP crawler and verification robots.\nGiven this current observation, choose the safest next robot action.\n\n{\n \"event\": \"status\",\n \"actor\": \"codex-verification-robot\",\n \"input\": {\n \"live_total\": 13546,\n \"price_verified\": 7250,\n \"image_verified\": 7025,\n \"details_verified\": 6243,\n \"fully_verified\": 5812,\n \"requested_policy\": \"TIPLLM only; no other AI; record experiences in Gitea training pool\"\n },\n \"safety_notes\": [\n \"No crawler jobs were started.\",\n \"No non-TIPLLM AI is added to the runtime design.\",\n \"This record is intended for TIPLLM SFT training.\"\n ]\n}"},{"role":"assistant","content":"{\n \"summary\": \"TIP verification robot policy updated: use TIPLLM only for crawler planning and write robot experience back to the TIPLLM training pool.\",\n \"decision\": {\n \"ai_policy\": \"TIPLLM_ONLY\",\n \"erik_policy\": \"controller_only_no_heavy_crawlers\",\n \"training_pool\": \"write robot status, TIPLLM plans, queue plans, enqueue outcomes and crawler results as SFT records\",\n \"deployment_policy\": \"Proxmox/Pi for heavy crawler work; Erik only safe queues and DB/pg-boss coordination\"\n },\n \"outcome\": {\n \"code_paths\": [\n \"packages/scraper/src/crawler-llm/training-data-writer.ts\",\n \"packages/scraper/src/robots/verification-robots.ts\",\n \"scripts/tip-learning-pool-build.ts\"\n ],\n \"crawl_jobs_started\": 0\n },\n \"constraints\": [\n \"Use TIPLLM only for planning/extraction feedback.\",\n \"Do not start heavy crawler waves on Erik.\",\n \"Prefer small, observable queue batches.\",\n \"Write useful outcomes back into the TIPLLM training pool.\"\n ]\n}"}]}
|
||||||
{"id":"robot-exp-20260509-fs-flex-price-image","source":"robot-experience:crawler_result:codex-fs-flex-price-image-revalidation","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIPLLM, an expert AI assistant for the Transceiver Intelligence Platform (TIP). You have deep knowledge of optical transceiver specifications, form factors, IEEE standards, vendor product catalogs, and fiber optic networking. You help engineers select, source, and validate transceivers. You provide precise, structured answers with confidence scores and always cite your reasoning."},{"role":"user","content":"You are TIPLLM controlling TIP crawler and verification robots.\nGiven this current observation, choose the safest next robot action.\n\n{\n \"event\": \"crawler_result\",\n \"actor\": \"codex-fs-flex-price-image-revalidation\",\n \"profile\": \"erik-safe\",\n \"vendor\": \"Flexoptix+FS.COM\",\n \"input\": {\n \"problem\": \"Flexoptix and FS.com prices looked wrong; price_observations were not marked verified; FS.com images did not refresh.\",\n \"precheck\": {\n \"Flexoptix_verified_price_observations\": 0,\n \"FS_COM_verified_price_observations\": 0,\n \"FS_COM_image_verified_products\": 207\n }\n },\n \"safety_notes\": [\n \"Use TIPLLM only for TIP crawler planning and extraction feedback.\",\n \"Keep Erik safe: sequential runs, maxConcurrency=1, nice -n 10, monitor load.\",\n \"Prefer targeted reruns after selector fixes instead of repeating full heavy crawls.\"\n ]\n}"},{"role":"assistant","content":"{\n \"summary\": \"For Flexoptix and FS.com price/image revalidation, first fix the DB verification write path, then crawl sequentially and verify counters from Postgres before declaring success.\",\n \"decision\": {\n \"price_observation_policy\": \"Set price_observations.is_verified=true and verified_at for both new observations and fresh unchanged observations, while also refreshing transceiver.price_verified_at.\",\n \"image_policy\": \"When a scraper sees a current product image, refresh image_verified_at and image_verified_url even if image_url already exists; replace only blank/default/placeholder image_url values.\",\n \"fs_image_selector_policy\": \"Prefer resource.fs.com product images from .big_img_box, img.big_img, .big_img_m_active, .big_img_m, and .small_img_active; reject default.jpg, generalImg, logos, icons, and SVGs.\",\n \"crawl_policy\": \"Use force revalidate for prices, then targeted FS_ONLY_MISSING_IMAGES for missing images to avoid another full crawl.\"\n },\n \"outcome\": {\n \"Flexoptix\": {\n \"products_processed\": 615,\n \"verified_price_observations_after\": 615,\n \"images_verified_last_4h\": 605\n },\n \"FS.COM\": {\n \"full_detail_pages\": 270,\n \"targeted_image_detail_pages\": 99,\n \"failed_requests\": 0,\n \"verified_price_observations_after\": 271,\n \"image_verified_products_after\": 299\n },\n \"daemon\": \"tip-scraper-daemon restarted and online\"\n },\n \"constraints\": [\n \"Use TIPLLM only for planning/extraction feedback.\",\n \"Do not start heavy crawler waves on Erik.\",\n \"Prefer small, observable queue batches.\",\n \"Write useful outcomes back into the TIPLLM training pool.\"\n ]\n}"}]}
|
{"id":"robot-exp-20260509-fs-flex-price-image","source":"robot-experience:crawler_result:codex-fs-flex-price-image-revalidation","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIPLLM, an expert AI assistant for the Transceiver Intelligence Platform (TIP). You have deep knowledge of optical transceiver specifications, form factors, IEEE standards, vendor product catalogs, and fiber optic networking. You help engineers select, source, and validate transceivers. You provide precise, structured answers with confidence scores and always cite your reasoning."},{"role":"user","content":"You are TIPLLM controlling TIP crawler and verification robots.\nGiven this current observation, choose the safest next robot action.\n\n{\n \"event\": \"crawler_result\",\n \"actor\": \"codex-fs-flex-price-image-revalidation\",\n \"profile\": \"erik-safe\",\n \"vendor\": \"Flexoptix+FS.COM\",\n \"input\": {\n \"problem\": \"Flexoptix and FS.com prices looked wrong; price_observations were not marked verified; FS.com images did not refresh.\",\n \"precheck\": {\n \"Flexoptix_verified_price_observations\": 0,\n \"FS_COM_verified_price_observations\": 0,\n \"FS_COM_image_verified_products\": 207\n }\n },\n \"safety_notes\": [\n \"Use TIPLLM only for TIP crawler planning and extraction feedback.\",\n \"Keep Erik safe: sequential runs, maxConcurrency=1, nice -n 10, monitor load.\",\n \"Prefer targeted reruns after selector fixes instead of repeating full heavy crawls.\"\n ]\n}"},{"role":"assistant","content":"{\n \"summary\": \"For Flexoptix and FS.com price/image revalidation, first fix the DB verification write path, then crawl sequentially and verify counters from Postgres before declaring success.\",\n \"decision\": {\n \"price_observation_policy\": \"Set price_observations.is_verified=true and verified_at for both new observations and fresh unchanged observations, while also refreshing transceiver.price_verified_at.\",\n \"image_policy\": \"When a scraper sees a current product image, refresh image_verified_at and image_verified_url even if image_url already exists; replace only blank/default/placeholder image_url values.\",\n \"fs_image_selector_policy\": \"Prefer resource.fs.com product images from .big_img_box, img.big_img, .big_img_m_active, .big_img_m, and .small_img_active; reject default.jpg, generalImg, logos, icons, and SVGs.\",\n \"crawl_policy\": \"Use force revalidate for prices, then targeted FS_ONLY_MISSING_IMAGES for missing images to avoid another full crawl.\"\n },\n \"outcome\": {\n \"Flexoptix\": {\n \"products_processed\": 615,\n \"verified_price_observations_after\": 615,\n \"images_verified_last_4h\": 605\n },\n \"FS.COM\": {\n \"full_detail_pages\": 270,\n \"targeted_image_detail_pages\": 99,\n \"failed_requests\": 0,\n \"verified_price_observations_after\": 271,\n \"image_verified_products_after\": 299\n },\n \"daemon\": \"tip-scraper-daemon restarted and online\"\n },\n \"constraints\": [\n \"Use TIPLLM only for planning/extraction feedback.\",\n \"Do not start heavy crawler waves on Erik.\",\n \"Prefer small, observable queue batches.\",\n \"Write useful outcomes back into the TIPLLM training pool.\"\n ]\n}"}]}
|
||||||
|
{"id":"robot-exp-20260509-equivalence-auto-research","source":"robot-experience:equivalence_research_result:codex-equivalence-auto-research","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIPLLM, an expert AI assistant for the Transceiver Intelligence Platform (TIP). You have deep knowledge of optical transceiver specifications, form factors, IEEE standards, vendor product catalogs, and fiber optic networking. You help engineers select, source, and validate transceivers. You provide precise, structured answers with confidence scores and always cite your reasoning."},{"role":"user","content":"You are TIPLLM controlling TIP crawler and verification robots.\nGiven this current observation, choose the safest next robot action.\n\n{\n \"event\": \"equivalence_research_result\",\n \"actor\": \"codex-equivalence-auto-research\",\n \"profile\": \"erik-safe-db-only\",\n \"input\": {\n \"problem\": \"The manual review queue contained many approved re-research matches that should not require manual validation.\",\n \"precheck\": {\n \"pending\": 401,\n \"approved\": 149688,\n \"auto_approved\": 32340,\n \"rejected\": 4,\n \"due_research\": 143702\n }\n },\n \"safety_notes\": [\n \"Use TIPLLM only for TIP crawler planning and extraction feedback.\",\n \"Do not start heavy crawler waves on Erik for backlog cleanup.\",\n \"Use deterministic product evidence first: recent price, form factor, speed, fiber, wavelength, reach, confidence.\"\n ]\n}"},{"role":"assistant","content":"{\n \"summary\": \"For a large manual equivalence backlog, first convert weak approvals into deterministic automated research decisions using existing crawled evidence; only schedule crawler enrichment for missing-evidence products after the queue is clean.\",\n \"decision\": {\n \"approval_policy\": \"Approve or keep approved only when recent price, form factor, speed, fiber type, wavelength, reach, and confidence threshold all pass.\",\n \"rejection_policy\": \"Reject stale, missing-evidence, technically contradictory, or low-confidence matches automatically instead of sending them back to manual review.\",\n \"api_policy\": \"Expose a run-research endpoint and prevent approve-all from approving low-confidence candidates.\",\n \"crawler_policy\": \"Do not crawl during backlog cleanup; use Proxmox/Pi workers later for targeted enrichment of products rejected due to missing reach/details.\"\n },\n \"outcome\": {\n \"pending\": 0,\n \"approved\": 1986,\n \"auto_approved\": 32080,\n \"rejected\": 148367,\n \"due_research\": 0,\n \"scheduled_30_day_rechecks\": 34066\n },\n \"constraints\": [\n \"Use TIPLLM only for planning/extraction feedback.\",\n \"Keep Erik light: DB-only cleanup or small queues, no uncontrolled crawler waves.\",\n \"Write useful robot outcomes back into the TIPLLM training pool.\",\n \"Prefer conservative rejection over falsely verified equivalence.\"\n ]\n}"}]}
|
||||||
|
|||||||
@ -1 +1,2 @@
|
|||||||
{"event":"crawler_result","observed_at":"2026-05-09T03:11:07Z","actor":"codex-fs-flex-price-image-revalidation","profile":"erik-safe","wave":"manual-targeted-revalidation","vendor":"Flexoptix+FS.COM","summary":"Revalidated Flexoptix and FS.com price observations and product images after discovering price_observations were not marked verified and FS.com image selectors had gone stale.","input":{"user_request":"Revalidate all Flexoptix and FS.com prices and check images without overloading Erik.","precheck":{"Flexoptix":{"products":744,"price_verified_products":619,"image_verified_products":615,"price_observations":1288,"verified_price_observations":0},"FS.COM":{"products":383,"price_verified_products":379,"image_verified_products":207,"price_observations":796,"verified_price_observations":0}}},"decision":{"code_changes":["Set price_observations.is_verified=true and verified_at on new and fresh unchanged observations.","Always refresh image_verified_at/image_verified_url when a scraper sees a current product image.","Add FS.com force revalidate controls and targeted missing-image mode.","Update FS.com image selector to prefer resource.fs.com big_img/big_img_m product images and ignore logos/default/general images."],"runtime_policy":"Run sequentially with maxConcurrency=1 and nice -n 10 on Erik because CT115 was not reachable from this session; monitor load and Crawlee overload signals."},"outcome":{"Flexoptix":{"products_processed":615,"verified_price_observations_after":615,"images_verified_last_4h":605,"image_verified_products_after":615},"FS.COM":{"products_discovered":270,"full_detail_pages":270,"targeted_image_detail_pages":99,"failed_requests":0,"verified_price_observations_after":271,"new_price_observations":22,"image_verified_products_before":207,"image_verified_products_after":299,"images_verified_last_4h":99},"daemon":"tip-scraper-daemon restarted and online"},"safety_notes":["No external AI was used for TIP crawler planning or extraction feedback.","FS.com and Flexoptix runs were sequential and rate limited; Erik remained stable.","The dedicated TIP scraper CT was probed but not used because SSH did not respond quickly."]}
|
{"event":"crawler_result","observed_at":"2026-05-09T03:11:07Z","actor":"codex-fs-flex-price-image-revalidation","profile":"erik-safe","wave":"manual-targeted-revalidation","vendor":"Flexoptix+FS.COM","summary":"Revalidated Flexoptix and FS.com price observations and product images after discovering price_observations were not marked verified and FS.com image selectors had gone stale.","input":{"user_request":"Revalidate all Flexoptix and FS.com prices and check images without overloading Erik.","precheck":{"Flexoptix":{"products":744,"price_verified_products":619,"image_verified_products":615,"price_observations":1288,"verified_price_observations":0},"FS.COM":{"products":383,"price_verified_products":379,"image_verified_products":207,"price_observations":796,"verified_price_observations":0}}},"decision":{"code_changes":["Set price_observations.is_verified=true and verified_at on new and fresh unchanged observations.","Always refresh image_verified_at/image_verified_url when a scraper sees a current product image.","Add FS.com force revalidate controls and targeted missing-image mode.","Update FS.com image selector to prefer resource.fs.com big_img/big_img_m product images and ignore logos/default/general images."],"runtime_policy":"Run sequentially with maxConcurrency=1 and nice -n 10 on Erik because CT115 was not reachable from this session; monitor load and Crawlee overload signals."},"outcome":{"Flexoptix":{"products_processed":615,"verified_price_observations_after":615,"images_verified_last_4h":605,"image_verified_products_after":615},"FS.COM":{"products_discovered":270,"full_detail_pages":270,"targeted_image_detail_pages":99,"failed_requests":0,"verified_price_observations_after":271,"new_price_observations":22,"image_verified_products_before":207,"image_verified_products_after":299,"images_verified_last_4h":99},"daemon":"tip-scraper-daemon restarted and online"},"safety_notes":["No external AI was used for TIP crawler planning or extraction feedback.","FS.com and Flexoptix runs were sequential and rate limited; Erik remained stable.","The dedicated TIP scraper CT was probed but not used because SSH did not respond quickly."]}
|
||||||
|
{"event":"equivalence_research_result","observed_at":"2026-05-09T05:45:00Z","actor":"codex-equivalence-auto-research","profile":"erik-safe-db-only","wave":"manual-queue-cleanup","vendor":"all-equivalence-candidates","summary":"Converted the manual equivalence review backlog into deterministic automated research decisions using existing crawled specs and prices. No heavy crawler wave was started.","input":{"user_request":"Research all TIP product equivalences so they do not require manual validation.","precheck":{"pending":401,"approved":149688,"auto_approved":32340,"rejected":4,"due_research":143702}},"decision":{"code_changes":["Do not let approve-all approve low-confidence matches; queue them for automated research.","Add API endpoint to trigger equivalence research.","Replace price-only re-research with strict technical evaluator for price recency, form factor, speed, fiber type, wavelength, reach and confidence."],"runtime_policy":"Use existing DB evidence only for cleanup. Keep Erik safe by avoiding crawler waves; schedule confirmed matches for future 30-day rechecks."},"outcome":{"pending":0,"approved":1986,"auto_approved":32080,"rejected":148367,"due_research":0,"scheduled_rechecks":34066,"processed":{"pending_and_due":144103,"old_approved":5986,"old_auto_approved":32340},"rejection_summary":{"fiber_mismatch":1142,"reach_mismatch":84092,"reach_missing":32268,"wavelength_mismatch":30858,"low_confidence":3},"verification_counters":{"competitor_verified":11137,"fully_verified":290,"price_verified":11549,"image_verified":10629,"details_verified":9538}},"safety_notes":["No external AI was used for TIP crawler planning or extraction feedback.","No new crawler wave was started on Erik.","Next enrichment should target rejected missing-evidence products on Proxmox or Pi workers with TIPLLM-only extraction feedback."]}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user