/* * Vigenere.java * by Eric Farmer */ /** * This class contains methods for encrypting and decrypting using the Vigenere * cipher. All computation is done with arrays of character codes in the range * 0 to 25 ('a' to 'z'); convenience methods convert between this and string * format. * * @author Eric Farmer * @version 2002-04-30 */ public class Vigenere { /* Contains only static methods. */ private Vigenere() {} /** * Converts the alphabetic characters in the given string to an array of * character codes in the range 0 to 25. All non-alphabetic characters are * ignored. * * @param s the string to convert * * @return the array of character codes */ public static char[] stringToLetters(String s) { StringBuffer buffer = new StringBuffer(s.toLowerCase()); int length = 0; for (int i = 0; i < buffer.length(); i++) { char ch = buffer.charAt(i); if (Character.isLetter(ch)) { buffer.setCharAt(length++, ch); } } buffer.setLength(length); char[] c = buffer.toString().toCharArray(); for (int i = 0; i < c.length; i++) { c[i] -= 'a'; } return c; } /** * Converts the given array of character codes, in the range 0 to 25, to a * string of corresponding lowercase alphabetic characters. * * @param c the array of character codes * * @return the string of alphabetic characters */ public static String lettersToString(char[] c) { for (int i = 0; i < c.length; i++) { c[i] += 'a'; } String s = String.copyValueOf(c); for (int i = 0; i < c.length; i++) { c[i] -= 'a'; } return s; } /** * Encrypts the given plaintext with the given key, both consisting of * character codes in the range 0 to 25. * * @param plainText the text to be encrypted * @param key the encryption key * * @return the encrypted text */ public static char[] encrypt(char[] plainText, char[] key) { char[] cipherText = new char[plainText.length]; for (int i = 0; i < plainText.length; i++) { cipherText[i] = (char)((plainText[i] + key[i%key.length])%26); } return cipherText; } /** * Decrypts the given ciphertext with the given key, both consisting of * character codes in the range 0 to 25. * * @param cipherText the text to be decrypted * @param key the decryption key * * @return the decrypted text */ public static char[] decrypt(char[] cipherText, char[] key) { char[] plainText = new char[cipherText.length]; for (int i = 0; i < cipherText.length; i++) { plainText[i] = (char)((cipherText[i] + 26 - key[i%key.length])%26); } return plainText; } /** * Estimates the keyword length for the given ciphertext, based on the * index of coincidence test. The keyword length which maximizes the * minimum index of coincidence over all parts of the ciphertext partition * is returned. * * @param cipherText the ciphertext * * @return the estimated keyword length */ public static int guessKeyLength(char[] cipherText) { int bestKeyLength = 2; double maxMinIndex = Double.NEGATIVE_INFINITY; int maxKeyLength = (int)Math.sqrt(cipherText.length); /* Try "all" possible keyword lengths. */ for (int keyLength = 2; keyLength <= maxKeyLength; keyLength++) { double minIndex = Double.POSITIVE_INFINITY; /* Compute each index of coincidence, and find the minimum. */ for (int offset = 0; offset < keyLength; offset++) { double index = indexOfCoincidence(cipherText, offset, keyLength); if (index < minIndex) { minIndex = index; } } /* Select the keyword length that maximizes the minimum. */ if (minIndex > maxMinIndex) { maxMinIndex = minIndex; bestKeyLength = keyLength; } } return bestKeyLength; } /** * Estimates the keyword for the given ciphertext, given the keyword * length, based on a combination of monoalphabetic frequency analysis and * mutual indices of coincidence. * * @param cipherText the ciphertext * @param keyLength the keyword length * * @return the estimated keyword */ public static char[] guessKey(char[] cipherText, int keyLength) { char[] key = new char[keyLength]; boolean[] found = new boolean[keyLength]; int lastFound = 0; /* * Build the keyword one letter at a time. Based on simple frequency * analysis in each (monoalphabetic) block, estimate each keyletter by * matching the most frequent ciphertext letter with plaintext 'e'. * Select the best of these matches by "confidence," measured by the * discrepancy between the most frequent and second most frequent * letter. */ int maxDiffFreq = -1; for (int offset = 0; offset < keyLength; offset++) { int[] bins = frequencies(cipherText, offset, keyLength); int bestE = 0; /* Estimate keyletter based on plaintext 'e'. */ for (int c = 1; c < 26; c++) { if (bins[c] > bins[bestE]) { bestE = c; } } int maxFrequency = bins[bestE]; /* Select the keyletter we are most confident in. */ sort(bins); int diff = maxFrequency - bins[24]; if (diff > maxDiffFreq) { maxDiffFreq = diff; lastFound = offset; key[offset] = (char)((bestE + 22)%26); } } found[lastFound] = true; /* * Continue the incremental building of the keyword using mutual * indices of coincidence. Pair the most recently found keyletter with * each other remaining keyletter, "solving" using the mutual index of * coincidence test. Select the best of these solutions again by * "confidence," or discrepancy between the two highest indices. */ for (int i = 1; i < keyLength; i++) { int bestOffset = 0; double maxDiffIndex = Double.NEGATIVE_INFINITY; for (int offset = 0; offset < keyLength; offset++) { /* If this keyletter isn't already known, estimate it. */ if (!found[offset]) { double[] indices = mutualIndicesOfCoincidence(cipherText, offset, lastFound, keyLength); int bestShift = 0; /* Estimate keyletter based on mutual index. */ for (int shift = 1; shift < 26; shift++) { if (indices[shift] > indices[bestShift]) { bestShift = shift; } } double maxIndex = indices[bestShift]; /* Select the keyletter we are most confident in. */ sort(indices); double diff = maxIndex - indices[24]; if (diff > maxDiffIndex) { maxDiffIndex = diff; bestOffset = offset; key[offset] = (char)((key[lastFound] + bestShift)%26); } } } lastFound = bestOffset; found[lastFound] = true; } /* * At this point, we have a very good guess of the keyword. The weak * point seems to be not the relationships between keyletters (derived * by the mutual indices of coincidence), but the choice from the 26 * cyclic shifts of the keyword based on frequency analysis. We refine * our guess by selecting the shift which maximizes the average * frequency of guessed plaintext 'e' over all parts of the ciphertext * partition. */ int[][] bins = new int[keyLength][]; for (int offset = 0; offset < keyLength; offset++) { bins[offset] = frequencies(cipherText, offset, keyLength); } int bestShift = 0, maxFreq = -1; for (int shift = 0; shift < 26; shift++) { int totalFreq = 0; for (int offset = 0; offset < keyLength; offset++) { totalFreq += bins[offset][(4 + key[offset] + shift)%26]; } if (totalFreq > maxFreq) { maxFreq = totalFreq; bestShift = shift; } } for (int offset = 0; offset < keyLength; offset++) { key[offset] = (char)((key[offset] + bestShift)%26); } return key; } /** * Returns the frequencies of the letters in the given subtext of the given * ciphertext. Element c (in the range 0 to 25) in the array that is * returned indicates the number of occurrences of letter c in the subtext. * The subtext is specified by the keyword length and an offset; e.g., the * entire ciphertext is specified by offset 0 and key length 1. * * @param cipherText the entire ciphertext * @param offset the offset from the start of the ciphertext * @param keyLength the keyword length * * @return the frequencies of the 26 ciphertext letters */ public static int[] frequencies(char[] cipherText, int offset, int keyLength) { int[] bins = new int[26]; for (int i = offset; i < cipherText.length; i+= keyLength) { bins[cipherText[i]]++; } return bins; } /** * Returns the index of coincidence of the given subtext of the given * ciphertext. * * @see #frequencies * * @param cipherText the entire ciphertext * @param offset the offset from the start of the ciphertext * @param keyLength the keyword length * * @return the index of coincidence */ public static double indexOfCoincidence(char[] cipherText, int offset, int keyLength) { double index = 0; int[] bins = frequencies(cipherText, offset, keyLength); int length = 0; for (int c = 0; c < 26; c++) { index += bins[c]*(bins[c] - 1); length += bins[c]; } return index/(length*(length - 1)); } /** * Returns the mutual indices of coincidence for the given subtexts of the * given ciphertext, one for each of 26 possible shifts. Element c in the * array that is returned indicates the mutual index of coincidence for the * first subtext and the second subtext shifted by c. * * @see #frequencies * * @param cipherText the entire ciphertext * @param offset1 the first offset from the start of the ciphertext * @param offset2 the second offset from the start of the ciphertext * @param keyLength the keyword length * * @return the mutual indices of coincidence */ public static double[] mutualIndicesOfCoincidence(char[] cipherText, int offset1, int offset2, int keyLength) { double[] indices = new double[26]; /* Compute frequencies in each (monoalphabetic) block only once. */ int[] bins1 = frequencies(cipherText, offset1, keyLength); int[] bins2 = frequencies(cipherText, offset2, keyLength); int length1 = 0, length2 = 0; for (int c = 0; c < 26; c++) { length1 += bins1[c]; length2 += bins2[c]; } /* Compute index for each possible shift. */ for (int shift = 0; shift < 26; shift++) { for (int c = 0; c < 26; c++) { indices[shift] += bins1[(c + shift)%26]*bins2[c]; } indices[shift] /= length1*length2; } return indices; } /* Sorting is not built in to Java 1.1 */ private static void sort(int[] a) { for (int i = 0; i < a.length - 1; i++) { for (int j = i + 1; j < a.length; j++) { if (a[i] > a[j]) { int temp = a[i]; a[i] = a[j]; a[j] = temp; } } } } private static void sort(double[] a) { for (int i = 0; i < a.length - 1; i++) { for (int j = i + 1; j < a.length; j++) { if (a[i] > a[j]) { double temp = a[i]; a[i] = a[j]; a[j] = temp; } } } } }