Merge pull request #3541 from stalefishies/compression-contracts

CODINGCONTRACT: Three new compression contracts
This commit is contained in:
hydroflame 2022-04-25 11:26:22 -04:00 committed by GitHub
commit 91223072dd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 345 additions and 0 deletions

@ -1,6 +1,7 @@
import { getRandomInt } from "../utils/helpers/getRandomInt";
import { MinHeap } from "../utils/Heap";
import { comprGenChar, comprLZGenerate, comprLZEncode, comprLZDecode } from "../utils/CompressionContract";
import { HammingEncode, HammingDecode } from "../utils/HammingCodeTools";
/* tslint:disable:completed-docs no-magic-numbers arrow-return-shorthand */
@ -1456,4 +1457,155 @@ export const codingContractTypesMetadata: ICodingContractTypeMetadata[] = [
else return false;
},
},
{
name: "Compression I: RLE Compression",
difficulty: 2,
numTries: 10,
desc: (plaintext: string): string => {
return [
"Run-length encoding (RLE) is a data compression technique which encodes data as a series of runs of",
"a repeated single character. Runs are encoded as a length, followed by the character itself. Lengths",
"are encoded as a single ASCII digit; runs of 10 characters or more are encoded by splitting them",
"into multiple runs.\n\n",
"You are given the following input string:\n",
`    ${plaintext}\n`,
"Encode it using run-length encoding with the minimum possible output length.\n\n",
"Examples:\n",
"    aaaaabccc            ->  5a1b3c\n",
"    aAaAaA               ->  1a1A1a1A1a1A\n",
"    111112333            ->  511233\n",
"    zzzzzzzzzzzzzzzzzzz  ->  9z9z1z  (or 9z8z2z, etc.)\n",
].join(" ");
},
gen: (): string => {
const length = 50 + Math.floor(25 * (Math.random() + Math.random()));
let plain = "";
while (plain.length < length) {
const r = Math.random();
let n = 1;
if (r < 0.3) {
n = 1;
} else if (r < 0.6) {
n = 2;
} else if (r < 0.9) {
n = Math.floor(10 * Math.random());
} else {
n = 10 + Math.floor(5 * Math.random());
}
const c = comprGenChar();
plain += c.repeat(n);
}
return plain.substring(0, length);
},
solver: (plain: string, ans: string): boolean => {
if (ans.length % 2 !== 0) {
return false;
}
let ans_plain = "";
for (let i = 0; i + 1 < ans.length; i += 2) {
const length = ans.charCodeAt(i) - 0x30;
if (length < 0 || length > 9) {
return false;
}
ans_plain += ans[i + 1].repeat(length);
}
if (ans_plain !== plain) {
return false;
}
let length = 0;
for (let i = 0; i < plain.length; ) {
let run_length = 1;
while (i + run_length < plain.length && plain[i + run_length] === plain[i]) {
++run_length;
}
i += run_length;
while (run_length > 0) {
run_length -= 9;
length += 2;
}
}
return ans.length === length;
},
},
{
name: "Compression II: LZ Decompression",
difficulty: 4,
numTries: 10,
desc: (compressed: string): string => {
return [
"Lempel-Ziv (LZ) compression is a data compression technique which encodes data using references to",
"earlier parts of the data. In this variant of LZ, data is encoded in two types of chunk. Each chunk",
"begins with a length L, encoded as a single ASCII digit from 1 - 9, followed by the chunk data,",
"which is either:\n\n",
"1. Exactly L characters, which are to be copied directly into the uncompressed data.\n",
"2. A reference to an earlier part of the uncompressed data. To do this, the length is followed",
"by a second ASCII digit X: each of the L output characters is a copy of the character X",
"places before it in the uncompressed data.\n\n",
"For both chunk types, a length of 0 instead means the chunk ends immediately, and the next character",
"is the start of a new chunk. The two chunk types alternate, starting with type 1, and the final",
"chunk may be of either type.\n\n",
"You are given the following LZ-encoded string:\n",
`&nbsp; &nbsp; ${compressed}\n`,
"Decode it and output the original string.\n\n",
"Example: decoding '5aaabc340533bca' chunk-by-chunk\n",
"&nbsp; &nbsp; 5aaabc &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; -> &nbsp;aaabc\n",
"&nbsp; &nbsp; 5aaabc34 &nbsp; &nbsp; &nbsp; &nbsp; -> &nbsp;aaabcaab\n",
"&nbsp; &nbsp; 5aaabc340 &nbsp; &nbsp; &nbsp; &nbsp;-> &nbsp;aaabcaab\n",
"&nbsp; &nbsp; 5aaabc34053 &nbsp; &nbsp; &nbsp;-> &nbsp;aaabcaabaabaa\n",
"&nbsp; &nbsp; 5aaabc340533bca &nbsp;-> &nbsp;aaabcaabaabaabca",
].join(" ");
},
gen: (): string => {
return comprLZEncode(comprLZGenerate());
},
solver: (compr: string, ans: string): boolean => {
return ans === comprLZDecode(compr);
},
},
{
name: "Compression III: LZ Compression",
difficulty: 10,
numTries: 10,
desc: (plaintext: string): string => {
return [
"Lempel-Ziv (LZ) compression is a data compression technique which encodes data using references to",
"earlier parts of the data. In this variant of LZ, data is encoded in two types of chunk. Each chunk",
"begins with a length L, encoded as a single ASCII digit from 1 - 9, followed by the chunk data,",
"which is either:\n\n",
"1. Exactly L characters, which are to be copied directly into the uncompressed data.\n",
"2. A reference to an earlier part of the uncompressed data. To do this, the length is followed",
"by a second ASCII digit X: each of the L output characters is a copy of the character X",
"places before it in the uncompressed data.\n\n",
"For both chunk types, a length of 0 instead means the chunk ends immediately, and the next character",
"is the start of a new chunk. The two chunk types alternate, starting with type 1, and the final",
"chunk may be of either type.\n\n",
"You are given the following input string:\n",
`&nbsp; &nbsp; ${plaintext}\n`,
"Encode it using Lempel-Ziv encoding with the minimum possible output length.\n\n",
"Examples (some have other possible encodings of minimal length):\n",
"&nbsp; &nbsp; abracadabra &nbsp; &nbsp;-> &nbsp;7abracad47\n",
"&nbsp; &nbsp; mississippi &nbsp; &nbsp;-> &nbsp;4miss433ppi\n",
"&nbsp; &nbsp; aAAaAAaAaAA &nbsp; &nbsp;-> &nbsp;3aAA53035\n",
"&nbsp; &nbsp; 2718281828 &nbsp; &nbsp; -> &nbsp;627182844\n",
"&nbsp; &nbsp; abcdefghijk &nbsp; &nbsp;-> &nbsp;9abcdefghi02jk\n",
"&nbsp; &nbsp; aaaaaaaaaaa &nbsp; &nbsp;-> &nbsp;1a911a\n",
"&nbsp; &nbsp; aaaaaaaaaaaa &nbsp; -> &nbsp;1a912aa\n",
"&nbsp; &nbsp; aaaaaaaaaaaaa &nbsp;-> &nbsp;1a91031",
].join(" ");
},
gen: (): string => {
return comprLZGenerate();
},
solver: (plain: string, ans: string): boolean => {
return comprLZDecode(ans) === plain && ans.length === comprLZEncode(plain).length;
},
},
];

@ -0,0 +1,193 @@
// choose random character for generating plaintexts to compress
export function comprGenChar(): string {
const r = Math.random();
if (r < 0.4) {
return "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[Math.floor(26 * Math.random())];
} else if (r < 0.8) {
return "abcdefghijklmnopqrstuvwxyz"[Math.floor(26 * Math.random())];
} else {
return "01234567689"[Math.floor(10 * Math.random())];
}
}
// generate plaintext which is amenable to LZ encoding
export function comprLZGenerate(): string {
const length = 50 + Math.floor(25 * (Math.random() + Math.random()));
let plain = "";
while (plain.length < length) {
if (Math.random() < 0.8) {
plain += comprGenChar();
} else {
const length = 1 + Math.floor(9 * Math.random());
const offset = 1 + Math.floor(9 * Math.random());
if (offset > plain.length) {
continue;
}
for (let i = 0; i < length; ++i) {
plain += plain[plain.length - offset];
}
}
}
return plain.substring(0, length);
}
// compress plaintest string
export function comprLZEncode(plain: string): string {
// for state[i][j]:
// if i is 0, we're adding a literal of length j
// else, we're adding a backreference of offset i and length j
let cur_state: (string | null)[][] = Array.from(Array(10), () => Array(10).fill(null));
let new_state: (string | null)[][] = Array.from(Array(10), () => Array(10));
function set(state: (string | null)[][], i: number, j: number, str: string): void {
const current = state[i][j];
if (current == null || str.length < current.length) {
state[i][j] = str;
} else if (str.length === current.length && Math.random() < 0.5) {
// if two strings are the same length, pick randomly so that
// we generate more possible inputs to Compression II
state[i][j] = str;
}
}
// initial state is a literal of length 1
cur_state[0][1] = "";
for (let i = 1; i < plain.length; ++i) {
for (const row of new_state) {
row.fill(null);
}
const c = plain[i];
// handle literals
for (let length = 1; length <= 9; ++length) {
const string = cur_state[0][length];
if (string == null) {
continue;
}
if (length < 9) {
// extend current literal
set(new_state, 0, length + 1, string);
} else {
// start new literal
set(new_state, 0, 1, string + "9" + plain.substring(i - 9, i) + "0");
}
for (let offset = 1; offset <= Math.min(9, i); ++offset) {
if (plain[i - offset] === c) {
// start new backreference
set(new_state, offset, 1, string + length + plain.substring(i - length, i));
}
}
}
// handle backreferences
for (let offset = 1; offset <= 9; ++offset) {
for (let length = 1; length <= 9; ++length) {
const string = cur_state[offset][length];
if (string == null) {
continue;
}
if (plain[i - offset] === c) {
if (length < 9) {
// extend current backreference
set(new_state, offset, length + 1, string);
} else {
// start new backreference
set(new_state, offset, 1, string + "9" + offset + "0");
}
}
// start new literal
set(new_state, 0, 1, string + length + offset);
}
}
const tmp_state = new_state;
new_state = cur_state;
cur_state = tmp_state;
}
let result = null;
for (let len = 1; len <= 9; ++len) {
let string = cur_state[0][len];
if (string == null) {
continue;
}
string += len + plain.substring(plain.length - len, plain.length);
if (result == null || string.length < result.length) {
result = string;
} else if (string.length == result.length && Math.random() < 0.5) {
result = string;
}
}
for (let offset = 1; offset <= 9; ++offset) {
for (let len = 1; len <= 9; ++len) {
let string = cur_state[offset][len];
if (string == null) {
continue;
}
string += len + "" + offset;
if (result == null || string.length < result.length) {
result = string;
} else if (string.length == result.length && Math.random() < 0.5) {
result = string;
}
}
}
return result ?? "";
}
// decompress LZ-compressed string, or return null if input is invalid
export function comprLZDecode(compr: string): string | null {
let plain = "";
for (let i = 0; i < compr.length; ) {
const literal_length = compr.charCodeAt(i) - 0x30;
if (literal_length < 0 || literal_length > 9 || i + 1 + literal_length > compr.length) {
return null;
}
plain += compr.substring(i + 1, i + 1 + literal_length);
i += 1 + literal_length;
if (i >= compr.length) {
break;
}
const backref_length = compr.charCodeAt(i) - 0x30;
if (backref_length < 0 || backref_length > 9) {
return null;
} else if (backref_length === 0) {
++i;
} else {
if (i + 1 >= compr.length) {
return null;
}
const backref_offset = compr.charCodeAt(i + 1) - 0x30;
if ((backref_length > 0 && (backref_offset < 1 || backref_offset > 9)) || backref_offset > plain.length) {
return null;
}
for (let j = 0; j < backref_length; ++j) {
plain += plain[plain.length - backref_offset];
}
i += 2;
}
}
return plain;
}