ReneZ > 24-11-2025, 11:41 AM
rikforto > 24-11-2025, 01:53 PM
(24-11-2025, 11:08 AM)MarcoP Wrote: You are not allowed to view links. Register or Login to view.(24-11-2025, 10:58 AM)Koen G Wrote: You are not allowed to view links. Register or Login to view.Although with Latin, we are much closer to mimicking what actual "solutions" feel like, with obscurity of the language being a prerequisite.
Yes, that's probably why English is not popular among "solvers"
bi3mw > 24-11-2025, 02:20 PM
Bluetoes101 > 24-11-2025, 02:52 PM
bi3mw > 24-11-2025, 05:16 PM

#!/usr/bin/env python3
# coding: utf-8
"""
DoubleMetaphone + Levenshtein Hybrid Decoder (with MAX_RATIO-threshold)
-----------------------------------------------------------------------
1. Load Latin words from wordlist.txt
2. Build a DoubleMetaphone index
3. For each Voynich word:
- Compute its DoubleMetaphone codes
- Find matching or phonetically similar Latin words
- Rank candidates using Levenshtein distance
- Output the best candidate or <no match>
"""
import sys
from metaphone import doublemetaphone
from collections import defaultdict
from tqdm import tqdm
# ---------------------------------------------------------
# Parameter: maximum allowed relative distance
# ---------------------------------------------------------
MAX_RATIO = 0.35 # e.g., 35% of word length
# ---------------------------------------------------------
# Levenshtein distance
# ---------------------------------------------------------
def levenshtein(a, b):
if not a:
return len(b)
if not b:
return len(a)
dp = range(len(b) + 1)
for i, ca in enumerate(a, 1):
new_dp = [i]
for j, cb in enumerate(b, 1):
if ca == cb:
new_dp.append(dp[j-1])
else:
new_dp.append(1 + min(dp[j-1], dp[j], new_dp[-1]))
dp = new_dp
return dp[-1]
# ---------------------------------------------------------
# Load Latin words
# ---------------------------------------------------------
def load_wordlist(path="wordlist.txt"):
words = []
with open(path, "r", encoding="utf8") as f:
for line in f:
word = line.strip().lower()
if word:
words.append(word)
return words
# ---------------------------------------------------------
# Normalize a Voynich word
# ---------------------------------------------------------
def normalize_voynich_word(w):
return "".join(c.lower() for c in w if c.isalpha())
# ---------------------------------------------------------
# Build Metaphone index
# ---------------------------------------------------------
def build_metaphone_index(words):
index = defaultdict(list)
for w in words:
m1, m2 = doublemetaphone(w)
if m1:
index[m1].append(w)
if m2 and m2 != m1:
index[m2].append(w)
return index
# ---------------------------------------------------------
# Combined DoubleMetaphone + Levenshtein matching
# ---------------------------------------------------------
def hybrid_match(v_word, index, top_n=1):
if not v_word:
return ["<no match>"]
m1, m2 = doublemetaphone(v_word)
candidates = []
# 1) Exact metaphone matches
if m1 in index:
candidates.extend(index[m1])
if m2 in index:
candidates.extend(index[m2])
# 2) If too few candidates, expand by prefix similarity
if len(candidates) < 5 and m1:
prefix = m1[:2]
for key in index:
if key.startswith(prefix):
candidates.extend(index[key])
if not candidates:
return ["<no match>"]
# 3) Compute Levenshtein distances
scored = []
for cand in candidates:
# compare only equal-length segment
score = levenshtein(v_word, cand[:len(v_word)])
scored.append((score, cand))
scored.sort(key=lambda x: x[0])
best_score = scored[0][0]
word_len = len(v_word)
# 4) Check acceptance threshold
max_allowed = max(1, int(word_len * MAX_RATIO))
if best_score > max_allowed:
return ["<no match>"]
# 5) All candidates with the same best score distance
best_candidates = [w for s, w in scored if s == best_score]
return best_candidates[:top_n]
# ---------------------------------------------------------
# MAIN
# ---------------------------------------------------------
def main():
if len(sys.argv) < 3:
print("Usage: python3 phonetic_levenshtein.py voyn2latin.txt output.txt")
sys.exit(1)
infile = sys.argv[1]
outfile = sys.argv[2]
print("Loading Latin wordlist …")
latin_words = load_wordlist("wordlist.txt")
print(f"{len(latin_words)} Latin words loaded.")
print("Building DoubleMetaphone index …")
index = build_metaphone_index(latin_words)
print(f"Index contains {len(index)} metaphone keys.")
print("Decoding …")
with open(infile, "r", encoding="utf8") as f:
lines = f.readlines()
output_lines = []
for line in tqdm(lines, desc="Lines"):
if not line.strip():
output_lines.append("")
continue
words = line.split()
decoded_line = []
for w in words:
normalized = normalize_voynich_word(w)
matches = hybrid_match(normalized, index, top_n=1)
decoded_line.append(matches[0])
output_lines.append(" ".join(decoded_line))
with open(outfile, "w", encoding="utf8") as f:
for row in output_lines:
f.write(row + "\n")
print("Done →", outfile)
if __name__ == "__main__":
main()bi3mw > 25-11-2025, 07:59 PM
#!/usr/bin/env python3
# coding: utf-8
import sys
import threading
import os
import subprocess
from tkinter import Tk, Label, Button, Text, Scrollbar, Entry, X, BOTTOM, END, BOTH
from metaphone import doublemetaphone
from collections import defaultdict
HIT_SYMBOL = "+"
MISS_SYMBOL = "-"
# ---------------------------------------------------------
def levenshtein(a, b):
if not a: return len(b)
if not b: return len(a)
dp = range(len(b) + 1)
for i, ca in enumerate(a, 1):
new_dp = [i]
for j, cb in enumerate(b, 1):
if ca == cb:
new_dp.append(dp[j - 1])
else:
new_dp.append(1 + min(dp[j - 1], dp[j], new_dp[-1]))
dp = new_dp
return dp[-1]
def load_wordlist(path="wordlist.txt"):
words = []
with open(path, "r", encoding="utf8") as f:
for line in f:
w = line.strip().lower()
if w:
words.append(w)
return words
def normalize_voynich_word(w):
return "".join(c.lower() for c in w if c.isalpha())
def build_metaphone_index(words):
index = defaultdict(list)
for w in words:
m1, m2 = doublemetaphone(w)
if m1:
index[m1].append(w)
if m2 and m2 != m1:
index[m2].append(w)
return index
def hybrid_match(v_word, index, MAX_RATIO, top_n=1):
if not v_word:
return ["<no match>"]
m1, m2 = doublemetaphone(v_word)
candidates = []
if m1 in index:
candidates.extend(index[m1])
if m2 in index:
candidates.extend(index[m2])
if len(candidates) < 5 and m1:
prefix = m1[:2]
for key in index:
if key.startswith(prefix):
candidates.extend(index[key])
if not candidates:
return ["<no match>"]
scored = [(levenshtein(v_word, cand[:len(v_word)]), cand) for cand in candidates]
scored.sort(key=lambda x: x[0])
best_score = scored[0][0]
max_allowed = max(1, int(len(v_word) * MAX_RATIO))
if best_score > max_allowed:
return ["<no match>"]
best = [w for s, w in scored if s == best_score]
return best[:top_n]
# ---------------------------------------------------------
class DecoderGUI:
def __init__(self, master, infile, outfile):
self.master = master
master.title("Voynich to Latin")
master.geometry("1200x500")
# Threshold Eingabe
self.threshold_label = Label(master, text="Threshold value (0.0 – 1.0):", font=("Courier", 12))
self.threshold_label.pack(anchor="w", padx=5, pady=2)
self.threshold_entry = Entry(master, width=10, font=("Courier", 12))
self.threshold_entry.insert(0, "0.35")
self.threshold_entry.pack(anchor="w", padx=5)
# Info-Lines
self.info_label = Label(master, text="Initializing...", font=("Courier", 12), justify="left")
self.info_label.pack(anchor="w", padx=5, pady=2)
# Hit/Miss Zeile
self.hit_text = Text(master, width=150, height=4, font=("Courier", 20), wrap="none")
self.hit_text.pack(fill=BOTH, expand=True, padx=5)
self.hit_text.tag_config("hit", foreground="green")
self.hit_text.tag_config("miss", foreground="red")
# Scrollbar
self.scrollbar = Scrollbar(master, orient="horizontal", command=self.hit_text.xview)
self.scrollbar.pack(side=BOTTOM, fill=X)
self.hit_text.config(xscrollcommand=self.scrollbar.set)
# Fortschritt
self.progress_label = Label(master, text="", font=("Courier", 12))
self.progress_label.pack(anchor="w", padx=5, pady=2)
# Statistik
self.stats_label = Label(master, text="", font=("Courier", 12), justify="left")
self.stats_label.pack(anchor="w", padx=5, pady=2)
# Buttons
self.start_button = Button(master, text="Start Decoding", command=self.start_decoding)
self.start_button.pack(side="left", padx=5, pady=5)
self.open_button = Button(master, text="Open Output File", command=self.open_output, state="disabled")
self.open_button.pack(side="left", padx=5, pady=5)
self.close_button = Button(master, text="Close", command=self.close_window)
self.close_button.pack(side="left", padx=5, pady=5)
self.infile = infile
self.outfile = outfile
self.stop_flag = False
# -----------------------------------------------------
def start_decoding(self):
# RESET aller Ausgaben
self.hit_text.delete("1.0", END)
self.progress_label.config(text="")
self.stats_label.config(text="")
self.open_button.config(state="disabled")
# Threshold lesen
try:
value = float(self.threshold_entry.get().strip())
if not (0 <= value <= 1):
raise ValueError
self.MAX_RATIO = value
except:
self.MAX_RATIO = 0.35
self.threshold_entry.delete(0, END)
self.threshold_entry.insert(0, "0.35")
# während Dekodierung deaktivieren
self.threshold_entry.config(state="disabled")
self.start_button.config(state="disabled")
threading.Thread(target=self.decode).start()
# -----------------------------------------------------
def open_output(self):
if sys.platform.startswith("win"):
os.startfile(self.outfile)
elif sys.platform == "darwin":
subprocess.Popen(["open", self.outfile])
else:
subprocess.Popen(["xdg-open", self.outfile])
# -----------------------------------------------------
def close_window(self):
self.stop_flag = True
self.master.destroy()
# -----------------------------------------------------
def insert_hit_line(self, hit_line):
for c in hit_line:
if c == HIT_SYMBOL:
self.hit_text.insert(END, c, "hit")
else:
self.hit_text.insert(END, c, "miss")
self.hit_text.insert(END, "\n")
self.hit_text.see(END)
# -----------------------------------------------------
def decode(self):
latin_words = load_wordlist("wordlist.txt")
index = build_metaphone_index(latin_words)
info_text = (
f"Loading Latin wordlist ...\n"
f"Total words: {len(latin_words)}\n"
f"Metaphone keys: {len(index)}\n"
f"Decoding ..."
)
self.info_label.config(text=info_text)
self.master.update()
with open(self.infile, "r", encoding="utf8") as f:
lines = f.readlines()
output_lines = []
total_words = total_match = total_no_match = 0
for i, line in enumerate(lines):
if self.stop_flag:
break
line = line.strip()
if not line:
output_lines.append("")
continue
words = line.split()
decoded_line = []
hit_line = ""
for w in words:
total_words += 1
normalized = normalize_voynich_word(w)
matches = hybrid_match(normalized, index, self.MAX_RATIO)
decoded = matches[0]
decoded_line.append(decoded)
if decoded == "<no match>":
hit_line += MISS_SYMBOL
total_no_match += 1
else:
hit_line += HIT_SYMBOL
total_match += 1
self.master.after(0, lambda hl=hit_line: self.insert_hit_line(hl))
output_lines.append(" ".join(decoded_line))
self.progress_label.config(text=f"{i+1}/{len(lines)} lines decoded")
self.master.update()
# Datei schreiben
with open(self.outfile, "w", encoding="utf8") as f:
for row in output_lines:
f.write(row + "\n")
stats_text = (
f"Decoding finished.\n"
f"Total words: {total_words}\n"
f"Matches: {total_match} ({total_match/total_words*100:.2f}%)\n"
f"No matches: {total_no_match} ({total_no_match/total_words*100:.2f}%)\n"
f"Output file: {self.outfile}\n"
f"Threshold value used: {self.MAX_RATIO}"
)
self.stats_label.config(text=stats_text)
# Eingabefeld & Button wieder aktivieren
self.start_button.config(state="normal")
self.threshold_entry.config(state="normal")
# **NEU: Output-File öffnen Button aktivieren**
self.open_button.config(state="normal")
# ---------------------------------------------------------
if __name__ == "__main__":
if len(sys.argv) < 3:
print("Usage: python phonetic_levenshtein_max_ratio_tk.py voyn2latin.txt mapped.txt")
sys.exit(1)
infile = sys.argv[1]
outfile = sys.argv[2]
root = Tk()
gui = DecoderGUI(root, infile, outfile)
root.mainloop()Doireannjane > 25-11-2025, 09:11 PM
(24-11-2025, 11:41 AM)ReneZ Wrote: You are not allowed to view links. Register or Login to view.Also interesting is the case when the proposed language is one that the solver is not too familiar with, but other people are. This was the case with an Italian proposed solution by Edith Sherwood, mainly for plant names, which seemed reasonable to her, but to a prominent Italian forum member (hint, hint) not at all.
Ruby Novacna > 26-11-2025, 08:10 AM
(23-11-2025, 11:30 AM)bi3mw Wrote: You are not allowed to view links. Register or Login to view.First of all: This is NOT a serious solution for the VMS! Since I have seen many “solutions” with pseudo-Latin lately, I just want to demonstrate how easy it is to replicate this with any word list. Anyone can reproduce the procedure described below...
bi3mw > 26-11-2025, 09:08 AM
(26-11-2025, 08:10 AM)Ruby Novacna Wrote: You are not allowed to view links. Register or Login to view.Unfortunately, I don't have that programming skill,
You don't really need any programming skills, just follow the instructions in the opening post. bi3mw > 29-11-2025, 04:57 PM

Voyn2Greek.zip (Size: 918.43 KB / Downloads: 5)
phonetic_levenshtein.txt (Size: 9.68 KB / Downloads: 1)