CSS Reftest Test: Segment Break Transformation Rules

#!/usr/bin/env python # - * - coding: UTF-8 - * - """ This script generates tests segment-break-transformation-rules-001 ~ 049 which cover all possible combinations of characters at two sides of segment breaks. More specifically, there are seven types of characters involve in these rules: 1. East Asian Full-width (F) 2. East Asian Half-width (H) 3. East Asian Wide (W) except Hangul 4. East Asian Narrow (Na) 5. East Asian Ambiguous (A) 6. Not East Asian (Neutral) 7. Hangul So there are 49 different combinations. It outputs a list of all tests it generated in the format of Mozilla reftest.list to the stdout. """ TEST_FILE = 'segment-break-transformation-rules-{:03}.html' TEST_TEMPLATE = ''' CSS Reftest Test: Segment Break Transformation Rules

Pass if there is {expect} white space between the two strings below.

{prevchar} {nextchar}

''' REF_FILE = 'segment-break-transformation-rules-{:03}-ref.html' REF_TEMPLATE_REMOVE = ''' CSS Reftest Reference: Segment Break Transformation Rules

Pass if there is NO white space between the two strings below.

{0}{1}

''' REF_TEMPLATE_KEEP = ''' CSS Reftest Reference: Segment Break Transformation Rules

Pass if there is ONE white space between the two strings below.

{0}{2}{1}

''' CHAR_SET = [ ('East Asian Full-width (F)', 'ＦＵＬＬＷＩＤＴＨ'), ('East Asian Half-width (H)', 'ﾃｽﾄ'), ('East Asian Wide (W) except Hangul', '測試'), ('East Asian Narrow (Na)', 'narrow'), ('East Asian Ambiguous (A)', '■'), ('Not East Asian (Neutral)', 'آزمون'), ('Hangul', '테스트'), ] def write_file(filename, content): with open(filename, 'wb') as f: f.write(content.encode('UTF-8')) print("# START tests from {}".format(__file__)) global idx idx = 0 for i, (prevtype, prevchars) in enumerate(CHAR_SET): for j, (nextype, nextchars) in enumerate(CHAR_SET): idx += 1 reffilename = REF_FILE.format(idx) testfilename = TEST_FILE.format(idx) # According to CSS Text 3 - 4.1.2. Segment Break Transformation Rules, # if the East Asian Width property of both the character before and # after the segment break is F, W, or H (not A), and neither side is # Hangul, then the segment break is removed. Otherwise, the segment # break is converted to a space (U+0020). if i < 3 and j < 3: write_file(reffilename, REF_TEMPLATE_REMOVE.format(prevchars, nextchars)) write_file(testfilename, TEST_TEMPLATE.format(index=idx, prev=prevtype, next=nextype, prevchar=prevchars, nextchar=nextchars, expect='NO')) else: write_file(reffilename, REF_TEMPLATE_KEEP.format(prevchars, nextchars, ' ')) write_file(testfilename, TEST_TEMPLATE.format(index=idx, prev=prevtype, next=nextype, prevchar=prevchars, nextchar=nextchars, expect='ONE')) print("== {} {}".format(testfilename, reffilename)) print("# END tests from {}".format(__file__))