first commit

This commit is contained in:
Oracle Public Cloud User
2025-09-02 23:15:17 +00:00
commit b51b2f5e1e
30 changed files with 598 additions and 0 deletions

View File

@@ -0,0 +1,23 @@
from rapidfuzz import fuzz
def _sim(a: str, b: str) -> float:
if not a or not b: return 0.0
return fuzz.token_set_ratio(a, b) / 100.0
def _pairs(n: int):
for i in range(n):
for j in range(i+1, n):
yield i, j
def dedupe_candidates(rows, threshold=0.87):
out = []
for i,j in _pairs(len(rows)):
a,b = rows[i], rows[j]
s = (
_sim(a.get("name",""), b.get("name","")) +
max(_sim(a.get("email",""), b.get("email","")), _sim(a.get("phone",""), b.get("phone",""))) +
_sim(a.get("address",""), b.get("address",""))
) / 3.0
if s >= threshold:
out.append({"i": i, "j": j, "score": round(s,3)})
return out