Merge pull request #3541 from stalefishies/compression-contracts

CODINGCONTRACT: Three new compression contracts
2024-12-20 05:05:47 +01:00 · 2022-04-25 11:26:22 -04:00 · 2022-04-25 11:26:22 -04:00 · 91223072dd
commit 91223072dd
parent 063d4b6e8d 174d17a5e2
2 changed files with 345 additions and 0 deletions
--- a/src/data/codingcontracttypes.ts
+++ b/src/data/codingcontracttypes.ts
@ -1,6 +1,7 @@
 import { getRandomInt } from "../utils/helpers/getRandomInt";
 import { MinHeap } from "../utils/Heap";

+import { comprGenChar, comprLZGenerate, comprLZEncode, comprLZDecode } from "../utils/CompressionContract";
 import { HammingEncode, HammingDecode } from "../utils/HammingCodeTools";
 /* tslint:disable:completed-docs no-magic-numbers arrow-return-shorthand */

@ -1456,4 +1457,155 @@ export const codingContractTypesMetadata: ICodingContractTypeMetadata[] = [
      else return false;
    },
  },
+  {
+    name: "Compression I: RLE Compression",
+    difficulty: 2,
+    numTries: 10,
+    desc: (plaintext: string): string => {
+      return [
+        "Run-length encoding (RLE) is a data compression technique which encodes data as a series of runs of",
+        "a repeated single character. Runs are encoded as a length, followed by the character itself. Lengths",
+        "are encoded as a single ASCII digit; runs of 10 characters or more are encoded by splitting them",
+        "into multiple runs.\n\n",
+        "You are given the following input string:\n",
+        `&nbsp; &nbsp; ${plaintext}\n`,
+        "Encode it using run-length encoding with the minimum possible output length.\n\n",
+        "Examples:\n",
+        "&nbsp; &nbsp; aaaaabccc &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;-> &nbsp;5a1b3c\n",
+        "&nbsp; &nbsp; aAaAaA &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; -> &nbsp;1a1A1a1A1a1A\n",
+        "&nbsp; &nbsp; 111112333 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;-> &nbsp;511233\n",
+        "&nbsp; &nbsp; zzzzzzzzzzzzzzzzzzz &nbsp;-> &nbsp;9z9z1z &nbsp;(or 9z8z2z, etc.)\n",
+      ].join(" ");
+    },
+    gen: (): string => {
+      const length = 50 + Math.floor(25 * (Math.random() + Math.random()));
+      let plain = "";
+
+      while (plain.length < length) {
+        const r = Math.random();
+
+        let n = 1;
+        if (r < 0.3) {
+          n = 1;
+        } else if (r < 0.6) {
+          n = 2;
+        } else if (r < 0.9) {
+          n = Math.floor(10 * Math.random());
+        } else {
+          n = 10 + Math.floor(5 * Math.random());
+        }
+
+        const c = comprGenChar();
+        plain += c.repeat(n);
+      }
+
+      return plain.substring(0, length);
+    },
+    solver: (plain: string, ans: string): boolean => {
+      if (ans.length % 2 !== 0) {
+        return false;
+      }
+
+      let ans_plain = "";
+      for (let i = 0; i + 1 < ans.length; i += 2) {
+        const length = ans.charCodeAt(i) - 0x30;
+        if (length < 0 || length > 9) {
+          return false;
+        }
+
+        ans_plain += ans[i + 1].repeat(length);
+      }
+      if (ans_plain !== plain) {
+        return false;
+      }
+
+      let length = 0;
+      for (let i = 0; i < plain.length; ) {
+        let run_length = 1;
+        while (i + run_length < plain.length && plain[i + run_length] === plain[i]) {
+          ++run_length;
+        }
+        i += run_length;
+
+        while (run_length > 0) {
+          run_length -= 9;
+          length += 2;
+        }
+      }
+      return ans.length === length;
+    },
+  },
+  {
+    name: "Compression II: LZ Decompression",
+    difficulty: 4,
+    numTries: 10,
+    desc: (compressed: string): string => {
+      return [
+        "Lempel-Ziv (LZ) compression is a data compression technique which encodes data using references to",
+        "earlier parts of the data. In this variant of LZ, data is encoded in two types of chunk. Each chunk",
+        "begins with a length L, encoded as a single ASCII digit from 1 - 9, followed by the chunk data,",
+        "which is either:\n\n",
+        "1. Exactly L characters, which are to be copied directly into the uncompressed data.\n",
+        "2. A reference to an earlier part of the uncompressed data. To do this, the length is followed",
+        "by a second ASCII digit X: each of the L output characters is a copy of the character X",
+        "places before it in the uncompressed data.\n\n",
+        "For both chunk types, a length of 0 instead means the chunk ends immediately, and the next character",
+        "is the start of a new chunk. The two chunk types alternate, starting with type 1, and the final",
+        "chunk may be of either type.\n\n",
+        "You are given the following LZ-encoded string:\n",
+        `&nbsp; &nbsp; ${compressed}\n`,
+        "Decode it and output the original string.\n\n",
+        "Example: decoding '5aaabc340533bca' chunk-by-chunk\n",
+        "&nbsp; &nbsp; 5aaabc &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; -> &nbsp;aaabc\n",
+        "&nbsp; &nbsp; 5aaabc34 &nbsp; &nbsp; &nbsp; &nbsp; -> &nbsp;aaabcaab\n",
+        "&nbsp; &nbsp; 5aaabc340 &nbsp; &nbsp; &nbsp; &nbsp;-> &nbsp;aaabcaab\n",
+        "&nbsp; &nbsp; 5aaabc34053 &nbsp; &nbsp; &nbsp;-> &nbsp;aaabcaabaabaa\n",
+        "&nbsp; &nbsp; 5aaabc340533bca &nbsp;-> &nbsp;aaabcaabaabaabca",
+      ].join(" ");
+    },
+    gen: (): string => {
+      return comprLZEncode(comprLZGenerate());
+    },
+    solver: (compr: string, ans: string): boolean => {
+      return ans === comprLZDecode(compr);
+    },
+  },
+  {
+    name: "Compression III: LZ Compression",
+    difficulty: 10,
+    numTries: 10,
+    desc: (plaintext: string): string => {
+      return [
+        "Lempel-Ziv (LZ) compression is a data compression technique which encodes data using references to",
+        "earlier parts of the data. In this variant of LZ, data is encoded in two types of chunk. Each chunk",
+        "begins with a length L, encoded as a single ASCII digit from 1 - 9, followed by the chunk data,",
+        "which is either:\n\n",
+        "1. Exactly L characters, which are to be copied directly into the uncompressed data.\n",
+        "2. A reference to an earlier part of the uncompressed data. To do this, the length is followed",
+        "by a second ASCII digit X: each of the L output characters is a copy of the character X",
+        "places before it in the uncompressed data.\n\n",
+        "For both chunk types, a length of 0 instead means the chunk ends immediately, and the next character",
+        "is the start of a new chunk. The two chunk types alternate, starting with type 1, and the final",
+        "chunk may be of either type.\n\n",
+        "You are given the following input string:\n",
+        `&nbsp; &nbsp; ${plaintext}\n`,
+        "Encode it using Lempel-Ziv encoding with the minimum possible output length.\n\n",
+        "Examples (some have other possible encodings of minimal length):\n",
+        "&nbsp; &nbsp; abracadabra &nbsp; &nbsp;-> &nbsp;7abracad47\n",
+        "&nbsp; &nbsp; mississippi &nbsp; &nbsp;-> &nbsp;4miss433ppi\n",
+        "&nbsp; &nbsp; aAAaAAaAaAA &nbsp; &nbsp;-> &nbsp;3aAA53035\n",
+        "&nbsp; &nbsp; 2718281828 &nbsp; &nbsp; -> &nbsp;627182844\n",
+        "&nbsp; &nbsp; abcdefghijk &nbsp; &nbsp;-> &nbsp;9abcdefghi02jk\n",
+        "&nbsp; &nbsp; aaaaaaaaaaa &nbsp; &nbsp;-> &nbsp;1a911a\n",
+        "&nbsp; &nbsp; aaaaaaaaaaaa &nbsp; -> &nbsp;1a912aa\n",
+        "&nbsp; &nbsp; aaaaaaaaaaaaa &nbsp;-> &nbsp;1a91031",
+      ].join(" ");
+    },
+    gen: (): string => {
+      return comprLZGenerate();
+    },
+    solver: (plain: string, ans: string): boolean => {
+      return comprLZDecode(ans) === plain && ans.length === comprLZEncode(plain).length;
+    },
+  },
 ];
--- a/src/utils/CompressionContracts.ts
+++ b/src/utils/CompressionContracts.ts
@ -0,0 +1,193 @@
+// choose random character for generating plaintexts to compress
+export function comprGenChar(): string {
+  const r = Math.random();
+  if (r < 0.4) {
+    return "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[Math.floor(26 * Math.random())];
+  } else if (r < 0.8) {
+    return "abcdefghijklmnopqrstuvwxyz"[Math.floor(26 * Math.random())];
+  } else {
+    return "01234567689"[Math.floor(10 * Math.random())];
+  }
+}
+
+// generate plaintext which is amenable to LZ encoding
+export function comprLZGenerate(): string {
+  const length = 50 + Math.floor(25 * (Math.random() + Math.random()));
+  let plain = "";
+
+  while (plain.length < length) {
+    if (Math.random() < 0.8) {
+      plain += comprGenChar();
+    } else {
+      const length = 1 + Math.floor(9 * Math.random());
+      const offset = 1 + Math.floor(9 * Math.random());
+      if (offset > plain.length) {
+        continue;
+      }
+
+      for (let i = 0; i < length; ++i) {
+        plain += plain[plain.length - offset];
+      }
+    }
+  }
+
+  return plain.substring(0, length);
+}
+
+// compress plaintest string
+export function comprLZEncode(plain: string): string {
+  // for state[i][j]:
+  //      if i is 0, we're adding a literal of length j
+  //      else, we're adding a backreference of offset i and length j
+  let cur_state: (string | null)[][] = Array.from(Array(10), () => Array(10).fill(null));
+  let new_state: (string | null)[][] = Array.from(Array(10), () => Array(10));
+
+  function set(state: (string | null)[][], i: number, j: number, str: string): void {
+    const current = state[i][j];
+    if (current == null || str.length < current.length) {
+      state[i][j] = str;
+    } else if (str.length === current.length && Math.random() < 0.5) {
+      // if two strings are the same length, pick randomly so that
+      // we generate more possible inputs to Compression II
+      state[i][j] = str;
+    }
+  }
+
+  // initial state is a literal of length 1
+  cur_state[0][1] = "";
+
+  for (let i = 1; i < plain.length; ++i) {
+    for (const row of new_state) {
+      row.fill(null);
+    }
+    const c = plain[i];
+
+    // handle literals
+    for (let length = 1; length <= 9; ++length) {
+      const string = cur_state[0][length];
+      if (string == null) {
+        continue;
+      }
+
+      if (length < 9) {
+        // extend current literal
+        set(new_state, 0, length + 1, string);
+      } else {
+        // start new literal
+        set(new_state, 0, 1, string + "9" + plain.substring(i - 9, i) + "0");
+      }
+
+      for (let offset = 1; offset <= Math.min(9, i); ++offset) {
+        if (plain[i - offset] === c) {
+          // start new backreference
+          set(new_state, offset, 1, string + length + plain.substring(i - length, i));
+        }
+      }
+    }
+
+    // handle backreferences
+    for (let offset = 1; offset <= 9; ++offset) {
+      for (let length = 1; length <= 9; ++length) {
+        const string = cur_state[offset][length];
+        if (string == null) {
+          continue;
+        }
+
+        if (plain[i - offset] === c) {
+          if (length < 9) {
+            // extend current backreference
+            set(new_state, offset, length + 1, string);
+          } else {
+            // start new backreference
+            set(new_state, offset, 1, string + "9" + offset + "0");
+          }
+        }
+
+        // start new literal
+        set(new_state, 0, 1, string + length + offset);
+      }
+    }
+
+    const tmp_state = new_state;
+    new_state = cur_state;
+    cur_state = tmp_state;
+  }
+
+  let result = null;
+
+  for (let len = 1; len <= 9; ++len) {
+    let string = cur_state[0][len];
+    if (string == null) {
+      continue;
+    }
+
+    string += len + plain.substring(plain.length - len, plain.length);
+    if (result == null || string.length < result.length) {
+      result = string;
+    } else if (string.length == result.length && Math.random() < 0.5) {
+      result = string;
+    }
+  }
+
+  for (let offset = 1; offset <= 9; ++offset) {
+    for (let len = 1; len <= 9; ++len) {
+      let string = cur_state[offset][len];
+      if (string == null) {
+        continue;
+      }
+
+      string += len + "" + offset;
+      if (result == null || string.length < result.length) {
+        result = string;
+      } else if (string.length == result.length && Math.random() < 0.5) {
+        result = string;
+      }
+    }
+  }
+
+  return result ?? "";
+}
+
+// decompress LZ-compressed string, or return null if input is invalid
+export function comprLZDecode(compr: string): string | null {
+  let plain = "";
+
+  for (let i = 0; i < compr.length; ) {
+    const literal_length = compr.charCodeAt(i) - 0x30;
+
+    if (literal_length < 0 || literal_length > 9 || i + 1 + literal_length > compr.length) {
+      return null;
+    }
+
+    plain += compr.substring(i + 1, i + 1 + literal_length);
+    i += 1 + literal_length;
+
+    if (i >= compr.length) {
+      break;
+    }
+    const backref_length = compr.charCodeAt(i) - 0x30;
+
+    if (backref_length < 0 || backref_length > 9) {
+      return null;
+    } else if (backref_length === 0) {
+      ++i;
+    } else {
+      if (i + 1 >= compr.length) {
+        return null;
+      }
+
+      const backref_offset = compr.charCodeAt(i + 1) - 0x30;
+      if ((backref_length > 0 && (backref_offset < 1 || backref_offset > 9)) || backref_offset > plain.length) {
+        return null;
+      }
+
+      for (let j = 0; j < backref_length; ++j) {
+        plain += plain[plain.length - backref_offset];
+      }
+
+      i += 2;
+    }
+  }
+
+  return plain;
+}