bi3mw > 13-08-2025, 05:50 PM
(13-08-2025, 11:41 AM)magnesium Wrote: You are not allowed to view links. Register or Login to view.Very interesting, thanks! Out of curiosity, would you mind running the same analysis on just the subsections of ZL3a-n.txt assigned to Voynich/Currier B? The overrepresentation of qok within the Naibbe cipher is meant to mimic Voynich B specifically, so I'm curious to see how well that holds up.
magnesium > 13-08-2025, 06:04 PM
(13-08-2025, 05:50 PM)bi3mw Wrote: You are not allowed to view links. Register or Login to view.Yes, I could do that. However, I would need a prepared ZL3a-n.txt file. Does anyone have one, or can anyone tell me how to extract Currier B from it ?
bi3mw > 13-08-2025, 06:31 PM
magnesium > 13-08-2025, 06:38 PM
(13-08-2025, 06:31 PM)bi3mw Wrote: You are not allowed to view links. Register or Login to view.I was actually thinking of an IVTT command on the command line that does everything in one go. Maybe @ReneZ can help here. I know the following command that extracts the entire text:
ivtt -x7 ZL3a-n.txt outputfile.txt
I just need to know the parameter to extract Currier B.
RobGea > 13-08-2025, 06:45 PM
bi3mw > 13-08-2025, 08:16 PM
bi3mw > 13-08-2025, 09:49 PM
(13-08-2025, 06:45 PM)RobGea Wrote: You are not allowed to view links. Register or Login to view.i would be interested to know which "language-independent morphological parser" you used. Web-based or software or whatever, thanks
#!/usr/bin/env python3
import sys
import re
from collections import Counter
def find_affixes(words, min_prefix_len=2, min_suffix_len=2, top_n=None):
prefixes = Counter()
suffixes = Counter()
for w in words:
if len(w) >= min_prefix_len:
for i in range(min_prefix_len, min(len(w), 6)):
prefixes[w[:i]] += 1
if len(w) >= min_suffix_len:
for i in range(min_suffix_len, min(len(w), 6)):
suffixes[w[-i:]] += 1
if top_n is None:
return prefixes.most_common(), suffixes.most_common()
else:
return prefixes.most_common(top_n), suffixes.most_common(top_n)
def segment_word_multi(word, prefix_set, suffix_set):
parts = []
stem = word
# Mehrere Präfixe entfernen
while True:
match = None
for p in sorted(prefix_set, key=len, reverse=True):
if stem.startswith(p) and len(stem) > len(p) + 1:
match = p
break
if match:
parts.append(match)
stem = stem[len(match):]
else:
break
suffix_parts = []
# Mehrere Suffixe entfernen
while True:
match = None
for s in sorted(suffix_set, key=len, reverse=True):
if stem.endswith(s) and len(stem) > len(s) + 1:
match = s
break
if match:
suffix_parts.insert(0, match)
stem = stem[:-len(match)]
else:
break
parts.append(stem)
parts.extend(suffix_parts)
return parts
def find_stem(parts, prefix_set, suffix_set):
for p in parts:
if p not in prefix_set and p not in suffix_set:
return p
return ""
def main():
argc = len(sys.argv)
if argc < 3 or argc > 4:
print(f"Usage: {sys.argv[0]} <input_file> <output_file> [top_n|'all']")
sys.exit(1)
input_file = sys.argv[1]
output_file = sys.argv[2]
top_n = 10 # Standardwert
if argc == 4:
param = sys.argv[3].lower()
if param == "all":
top_n = None # Alle ausgeben
else:
try:
top_n = int(param)
if top_n < 1:
raise ValueError()
except ValueError:
print("Der dritte Parameter muss eine positive ganze Zahl oder 'all' sein.")
sys.exit(1)
with open(input_file, "r", encoding="utf-8") as f:
text = f.read()
words = [w.lower() for w in re.findall(r"[A-Za-z]+", text)]
top_prefixes, top_suffixes = find_affixes(words, top_n=top_n)
prefix_set = {p for p, _ in top_prefixes}
suffix_set = {s for s, _ in top_suffixes}
stem_counter = Counter()
def replacer(match):
token = match.group(0)
parts = segment_word_multi(token.lower(), prefix_set, suffix_set)
stem = find_stem(parts, prefix_set, suffix_set)
stem_counter[stem] += 1
segmented = "/".join(parts)
if token[0].isupper():
segmented = segmented.capitalize()
return segmented
output_text = re.sub(r"[A-Za-z]+", replacer, text)
with open(output_file, "w", encoding="utf-8") as f:
f.write(output_text)
f.write("\n\n=== Präfixe ===\n")
for p, c in top_prefixes:
f.write(f"{p}\t{c}\n")
f.write("\n=== Stämme ===\n")
for s, c in stem_counter.most_common():
if s != "":
f.write(f"{s}\t{c}\n")
f.write("\n=== Suffixe ===\n")
for s, c in top_suffixes:
f.write(f"{s}\t{c}\n")
if __name__ == "__main__":
main()
#!/usr/bin/env python3
import sys
from collections import Counter
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
def extract_prefix_suffix(word):
parts = word.split('/')
if len(parts) < 3:
return [], [], None
stem_index = len(parts) // 2
prefix = parts[:stem_index]
suffix = parts[stem_index+1:]
return prefix, suffix, parts[stem_index]
def main():
if len(sys.argv) < 2:
print("Usage: python heatmap_prefix_suffix.py output_segmented.txt [N]")
sys.exit(1)
filename = sys.argv[1]
top_n = 20 # default number of top prefixes and suffixes
if len(sys.argv) >= 3:
try:
top_n = int(sys.argv[2])
except ValueError:
print("Parameter N must be an integer.")
sys.exit(1)
prefix_suffix_counts = Counter()
with open(filename, 'r', encoding='utf-8') as f:
for line in f:
words = line.strip().split()
for w in words:
prefix, suffix, stem = extract_prefix_suffix(w)
for pre in prefix:
for suf in suffix:
prefix_suffix_counts[(pre, suf)] += 1
if not prefix_suffix_counts:
print("No prefix-suffix combinations found.")
sys.exit(1)
data = []
for (pre, suf), count in prefix_suffix_counts.items():
data.append({"prefix": pre, "suffix": suf, "count": count})
df = pd.DataFrame(data)
# Select top N prefixes and suffixes by total count
top_prefixes = df.groupby('prefix')['count'].sum().nlargest(top_n).index
top_suffixes = df.groupby('suffix')['count'].sum().nlargest(top_n).index
pivot = df.pivot(index='prefix', columns='suffix', values='count').fillna(0)
pivot_top = pivot.loc[top_prefixes, top_suffixes]
plt.figure(figsize=(18, 12))
sns.heatmap(pivot_top, annot=True, fmt=".0f", cmap="YlGnBu", cbar_kws={"shrink": 0.5})
plt.title(f"Top {top_n} Prefix-Suffix Combinations")
plt.xlabel("Suffix")
plt.ylabel("Prefix")
plt.xticks(rotation=45, ha='right', fontsize=12)
plt.yticks(rotation=0, fontsize=12) # Präfix-Beschriftungen waagerecht
plt.subplots_adjust(bottom=0.2) # make space for xlabel
plt.show()
if __name__ == "__main__":
main()
quimqu > 13-08-2025, 10:40 PM
davidma > 14-08-2025, 12:25 AM
(13-08-2025, 10:40 PM)quimqu Wrote: You are not allowed to view links. Register or Login to view.This is very interesting. I find the cipher a bit complex (sort of dice and card game, and I don't see it very easy to cipher and not even very easy to decipher, but this is the trick! If it is cyphered, only the allowed people should be able to read it...).
ReneZ > 14-08-2025, 02:24 AM