From 49f0871720dbecdb7f42964892c21e9cdf0ba115 Mon Sep 17 00:00:00 2001 From: Rene Fichtmueller Date: Sat, 9 May 2026 14:06:55 +0200 Subject: [PATCH] chore: ignore crawlee python build artifacts --- packages/crawlee-python/.gitignore | 4 ++ .../PKG-INFO | 57 ------------------ .../SOURCES.txt | 10 --- .../dependency_links.txt | 1 - .../entry_points.txt | 2 - .../requires.txt | 11 ---- .../top_level.txt | 1 - .../__pycache__/__init__.cpython-314.pyc | Bin 331 -> 0 bytes .../__pycache__/__main__.cpython-314.pyc | Bin 6236 -> 0 bytes 9 files changed, 4 insertions(+), 82 deletions(-) create mode 100644 packages/crawlee-python/.gitignore delete mode 100644 packages/crawlee-python/tip_crawlee_python_worker.egg-info/PKG-INFO delete mode 100644 packages/crawlee-python/tip_crawlee_python_worker.egg-info/SOURCES.txt delete mode 100644 packages/crawlee-python/tip_crawlee_python_worker.egg-info/dependency_links.txt delete mode 100644 packages/crawlee-python/tip_crawlee_python_worker.egg-info/entry_points.txt delete mode 100644 packages/crawlee-python/tip_crawlee_python_worker.egg-info/requires.txt delete mode 100644 packages/crawlee-python/tip_crawlee_python_worker.egg-info/top_level.txt delete mode 100644 packages/crawlee-python/tip_crawlee_worker/__pycache__/__init__.cpython-314.pyc delete mode 100644 packages/crawlee-python/tip_crawlee_worker/__pycache__/__main__.cpython-314.pyc diff --git a/packages/crawlee-python/.gitignore b/packages/crawlee-python/.gitignore new file mode 100644 index 0000000..5cf763a --- /dev/null +++ b/packages/crawlee-python/.gitignore @@ -0,0 +1,4 @@ +*.egg-info/ +__pycache__/ +*.py[cod] +.venv/ diff --git a/packages/crawlee-python/tip_crawlee_python_worker.egg-info/PKG-INFO b/packages/crawlee-python/tip_crawlee_python_worker.egg-info/PKG-INFO deleted file mode 100644 index 43c3ec9..0000000 --- a/packages/crawlee-python/tip_crawlee_python_worker.egg-info/PKG-INFO +++ /dev/null @@ -1,57 +0,0 @@ -Metadata-Version: 2.4 -Name: tip-crawlee-python-worker -Version: 0.1.0 -Summary: Optional Crawlee Python worker for TIP crawler nodes -Requires-Python: >=3.11 -Description-Content-Type: text/markdown -Requires-Dist: crawlee>=1.0.0 -Provides-Extra: beautifulsoup -Requires-Dist: crawlee[beautifulsoup]>=1.0.0; extra == "beautifulsoup" -Provides-Extra: playwright -Requires-Dist: crawlee[playwright]>=1.0.0; extra == "playwright" -Requires-Dist: playwright>=1.50.0; extra == "playwright" -Provides-Extra: all -Requires-Dist: crawlee[all]>=1.0.0; extra == "all" - -# TIP Crawlee Python Worker - -Optional Python crawler worker for Pi/Proxmox/residential nodes. - -The TypeScript scraper package remains the production crawler core. This package -exists for isolated worker experiments where Python extraction libraries are a -better fit. It writes JSONL artifacts; it does not write directly to TIP -PostgreSQL. - -## Install - -```bash -cd packages/crawlee-python -python3 -m venv .venv -. .venv/bin/activate -python -m pip install -U pip -python -m pip install -e ".[beautifulsoup]" -``` - -For browser-based Python workers: - -```bash -python -m pip install -e ".[playwright]" -python -m playwright install chromium -``` - -## Smoke Run - -```bash -python -m tip_crawlee_worker \ - --mode beautifulsoup \ - --url https://crawlee.dev \ - --out /tmp/tip-crawlee-python-smoke.jsonl \ - --max-requests 1 -``` - -## TIP Policy - -- Use this on Pi/Proxmox/residential nodes first, not as an Erik-heavy crawler. -- Keep output as JSONL evidence until a deterministic importer validates it. -- Record useful crawler outcomes in the TIPLLM training pool. -- Use TIPLLM only for planning/extraction feedback; no external AI. diff --git a/packages/crawlee-python/tip_crawlee_python_worker.egg-info/SOURCES.txt b/packages/crawlee-python/tip_crawlee_python_worker.egg-info/SOURCES.txt deleted file mode 100644 index f3312c5..0000000 --- a/packages/crawlee-python/tip_crawlee_python_worker.egg-info/SOURCES.txt +++ /dev/null @@ -1,10 +0,0 @@ -README.md -pyproject.toml -tip_crawlee_python_worker.egg-info/PKG-INFO -tip_crawlee_python_worker.egg-info/SOURCES.txt -tip_crawlee_python_worker.egg-info/dependency_links.txt -tip_crawlee_python_worker.egg-info/entry_points.txt -tip_crawlee_python_worker.egg-info/requires.txt -tip_crawlee_python_worker.egg-info/top_level.txt -tip_crawlee_worker/__init__.py -tip_crawlee_worker/__main__.py \ No newline at end of file diff --git a/packages/crawlee-python/tip_crawlee_python_worker.egg-info/dependency_links.txt b/packages/crawlee-python/tip_crawlee_python_worker.egg-info/dependency_links.txt deleted file mode 100644 index 8b13789..0000000 --- a/packages/crawlee-python/tip_crawlee_python_worker.egg-info/dependency_links.txt +++ /dev/null @@ -1 +0,0 @@ - diff --git a/packages/crawlee-python/tip_crawlee_python_worker.egg-info/entry_points.txt b/packages/crawlee-python/tip_crawlee_python_worker.egg-info/entry_points.txt deleted file mode 100644 index 3cabb91..0000000 --- a/packages/crawlee-python/tip_crawlee_python_worker.egg-info/entry_points.txt +++ /dev/null @@ -1,2 +0,0 @@ -[console_scripts] -tip-crawlee-worker = tip_crawlee_worker.__main__:main diff --git a/packages/crawlee-python/tip_crawlee_python_worker.egg-info/requires.txt b/packages/crawlee-python/tip_crawlee_python_worker.egg-info/requires.txt deleted file mode 100644 index f13c8b8..0000000 --- a/packages/crawlee-python/tip_crawlee_python_worker.egg-info/requires.txt +++ /dev/null @@ -1,11 +0,0 @@ -crawlee>=1.0.0 - -[all] -crawlee[all]>=1.0.0 - -[beautifulsoup] -crawlee[beautifulsoup]>=1.0.0 - -[playwright] -crawlee[playwright]>=1.0.0 -playwright>=1.50.0 diff --git a/packages/crawlee-python/tip_crawlee_python_worker.egg-info/top_level.txt b/packages/crawlee-python/tip_crawlee_python_worker.egg-info/top_level.txt deleted file mode 100644 index 1dabd13..0000000 --- a/packages/crawlee-python/tip_crawlee_python_worker.egg-info/top_level.txt +++ /dev/null @@ -1 +0,0 @@ -tip_crawlee_worker diff --git a/packages/crawlee-python/tip_crawlee_worker/__pycache__/__init__.cpython-314.pyc b/packages/crawlee-python/tip_crawlee_worker/__pycache__/__init__.cpython-314.pyc deleted file mode 100644 index 6e2015f96e3d78b32e7c7846251c48a67c1adaaa..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 331 zcmXv~%SyvQ6rD-c7K>dexD0j`q?x)GH*w=eO3_e~;3t_? z!51>%8^Zkn^xn2L9HxvhL<)& z;WY>Om}#CBe1eu`J9h*9=Z-m{VLyVMQwAVP;Q(U2s5{p)l~)pPOvl@4van4K-z`e% UCy6%Q^Eb9?Z+d6zt~v6HU$QD=e*gdg diff --git a/packages/crawlee-python/tip_crawlee_worker/__pycache__/__main__.cpython-314.pyc b/packages/crawlee-python/tip_crawlee_worker/__pycache__/__main__.cpython-314.pyc deleted file mode 100644 index 41be146b197b96d07cc8b6f83176e473af971e4b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6236 zcmbtYZ)_CD6`#G`+uQqpHa2JD10S}59&k3;5Kw}WU=#ldj&avuMTz6na<{fOKHptt z_X=1jF-a6`rK(L63FlMz(#nxiDU~Xf_ESPqDgD$ladXE8q(rGy@y&!#O1`x3&HeFV zOibFbZ|3dWH?wbMe(%rBE|1$mp!~MsKiY4A`W7FG#gZqu+kXaPiVPE(>ml8o!PO&e z>9H6V*763=TEP&Y<$J8%q9HOFp~u!O84{zdJ@#&g;b64b?PGN1OB>{hgU=HZ1Kk7UrOK^jUqO2q*V4#- zp~{=c3vs4XNhFenVra>P-UrWiLz_@zYTQuZLFiYEF=+Y2i8CRKS=Fv8X+s-H#|M(> zR0mZ~#Z_ts9^`6?(T-%oP){3C4nBiFEVy_)Jl+lfF+~Q+u;t*JpeXPH-Dy<;9UColp!NKhR*l+Hyiysop}> zggT-{$Bc=z8V3zqj;Q*$kxaF8#FcbR4R$1BYRjl*jHOS8shUdaEe2H*dQ{bZq0(^d zWDATxu8gXBOO#Df4X4f+W64B|p`{`PR^(KYj>FKA$b_OLB9Z3Q8PgSs6c$sBM5qsR za>Gxrf@+G~B@$PE3#&}kcGlhoK8a#gEH)t6`p7V$DKb>#ll}tiN(>;j5*fgEps=24QA5gVnvf+47e=wOsv@7vo9>Hp`pKWh2zoh^x zXH3MLoDsxea-Q{yQdB$+lQE1w-W*4G;Z zGL<%hDX_!lkcHO5yyzyVCOx2*avJdaN?O$o9rO{5GbxpB#FrJ}Fq$!R4(n<(nTY9= zws1JEoK)kJm4M>#{lE@!tXs%xN<~96J%{ONdO}SY{b+a8bSbe|1l}^9$!iJ2bTE@d zrjEAAo+z`$AoZf6!D7YiU;&<{$aQyh*4=c~-IUpTV#z%?)tM67!#`!2LI8SJQq2ejEJh7nW1 zy`yNF=+2@^8k_XQw5ai-ZqVI9aI7y;CzI<-CRB50)ChyhqD~Zm8d6XJ_u^lS( zC_HzZyT$6c*fMJ`_2t`-j66R-k8M@2OF#O#Rb@yjGWt=&Rb?v~uWT}w^$u6>^AUz721&C&;C!4_&;vY&p!EYuS7qTBmvj!Xk>~&L8PiwjnNsgPG zGHFd4BjJMpb4g7^lQ9TzognK{6MCAe5k-$`nrYXQ)QF6$XLOTu^o2xb5-^l$ofwa4 z)D)877EJ3Ysu`*&ysU%qTVv^olpf+~Bj`sFTBbOrBw}$@$0=;zp0^~^$vQ=_Pn_4g)uV1(%estpF*i!Y;&treDmY1Y~yF7911X1|EY7hOOBQF26=Wv_#lQw6& zV872E1b7E3a<_v_iHF-m3#1$&Yrz)KS5O!9V3?=dkXjG&TR=#!V_&%owh_FARiCHVn{eVpUm*>F^%?z^Wo!iZ+nq2DyIp4690it)Lme0H%z~F|Tia{I1 zL(EJmdYngtp}B*m6OsGkfAgVuGOEP&C!5QYzMU{+395Bg$1MU?W=F>|wx0{^@j<9! zta%5gVZeeOvgCpK5XW5Z%Ap&U5)@A$6d%1`Kbr8WQW~q}{~zneOyW6CCIH{HgHOp= zR+MWda^*3u2tFSN!rp05M?2yWK=7C@Gv3s#Zc5|ISM}0PPRa=6d1v7%%Tb< z#Xq_xvb9VLp`b+=v=9b`espE4iYyl8i)%XyyBr`#;S98156~I*p`f#{N@wd&(Aieh z0ai{d(^(u8*VI`m%CDjGIz8^wS(NSft+Qn>?NftcGU9 zRZ|*M6-*s;)2B^z+qa(_QJ0fer+MADd4e#K8JdUsAXc5 zcHo0F0%}B*c-)jmRU?9~&E$b%X#zcpBDR8)rM>VJa?q!s$&X|!Xhi#v^$ZG&Of}sU zP$8HF)H*$YZ+JLQ=$M^t59Z6%6w6Lc?a3-p+64& zeki-Y3;r|vyFTywTkl_cv%SNMy~EjF75+25>YVMO>w+umYgqI(WPPDUUnuKqS@g9m z`SxbUlhf8^kpvpwKlI+AY~7y4x;G;OqHpl0@ zm9wX2PR*rfU;W-n+;!6(vt2V?bI<&$@0!2u7K$xPIF?#fJZHcua!?VjnL8~oB0xK`5&qgAlcyf9jY zZ`%63gV?KbF7MaAjhULgIiG*N>itdcZOZwo=T2RG?ZRtW|IS7KPUKhTe4FOGbH0X4 zojKpuOZ%6-lH2nytJChd_uXcg@4fFN(DdGS4*0@7{Ylu2Hx64m4|1P~hrQ5TsSb38 zge!Y@0R3rAZD+Ia>0{MUf96EaXRWoJZNguC)mU#kya8B!As0LA&|yT3$WmzC6w?XB z5w!u6V%Ragg*YE6L+!d&}4;Yza& z==;(g^lA`0*1tacfM@CUqa`=&SotgrtwSmWOM%BqxS)b+zKJ#YgazJ}3Mi4!ttk+z z^XgJwiK)mM>55F)8(yck+6w