Перекрывающиеся фрагменты таблицы


Один из моих проектов является табличным сканер/парсер генератор, в стремлении уменьшить размер переходной таблице я представляю сформированную таблицу в[Х, Г] А , А'[х + я[г]] вместо А[х + г * с]. Это позволяет мне перекрыть одинаковые части таблицы.

Ниже приведен код, который отвечает за организацию строк. Я не очень счастлива с ним, в частности, ряд массивов, которые должны быть построены. Любые улучшения читабельности/производительность/эффективность будет высоко ценится.

/// <remarks>
/// A two dimentional array, or table, can be represented by a single dimentional table
/// where the data for position (x, y) is stored at index x + table_width * y. If the
/// table is read-only then the multiplication can be replaced with an index lookup
/// allowing identical portions of the table to be overlapped thus (depending on the
/// data) reducing the memory footprint of the table.
/// 
/// The purpose of this class is to take a bunch of rows and try to find an arrangement
/// with a good approximation of the minimal space required. Finding the optimal solution
/// reduces to the "Traveling Sailsman Problem", so a good approximation is sufficient.
/// 
/// This particular algorithm is intended to work with the characteristics of the sparse
/// state transition tables of the lexer/parser and will probably perform very poorly
/// for other cases.
/// 
/// Algorithm Overview:
///
/// * If two fragments with the same content are encountered, they are combined with
///   identical offsets.
///
/// * Each fragment gets two tags, one for each end. all start tags are put into one list
///   and all end tags in another.
///
/// * Sort both lists first by "end value" (the very first value for a start tag and the
///   very last value for an end tag) and second by "end length" (the number of
///   consecutive times "end value" appears) in descending order.
///
/// * Make a single pass over both lists at the same time, combining distinct fragments
///   with the longest sequence of an identical value.
///
/// * All remaining fragments are appended end to end.
/// </remarks>
[DebuggerDisplay("Count = {Count}")]
internal sealed class TableFragment : IList<int>, ICollection
{
    public TableFragment(int[] values, int state)
        : this(values, new int[] { state }, new int[] { 0 }, 0) { }

    public TableFragment(int[] values, int state, int skip)
        : this(values, new int[] { state }, new int[] { -skip }, skip) { }

    TableFragment(int[] values, int[] states, int[] offsets, int skip)
    {
        this._skip = skip;
        this._values = values;
        this._states = states;
        this._offsets = offsets;
    }

    /// <summary>
    /// Attempt to combine multiple TableFragments into a single fragment.
    /// </summary>
    /// <remarks>
    /// This method attempts to minimise the total length of the result by adjusting the
    /// offsets so that identical portions overlap.
    /// </remarks>
    public static TableFragment Combine(IList<TableFragment> fragments)
    {
        if (fragments == null) throw new ArgumentNullException("fragments");
        if (fragments.Count == 0) throw new ArgumentException("fragments list cannot be empty", "fragments");

        List<TableFragmentTag> startTags = new List<TableFragmentTag>(fragments.Count);
        List<TableFragmentTag> endTags = new List<TableFragmentTag>(fragments.Count);

        Dictionary<int, List<TableFragmentTag>> map = new Dictionary<int, List<TableFragmentTag>>();

        for (int i = 0; i < fragments.Count; i++)
        {
            TableFragmentTag startTag;
            TableFragmentTag endTag;
            int hash;

            CreateTags(fragments[i], out startTag, out endTag, out hash);

            List<TableFragmentTag> list;

            if (!map.TryGetValue(hash, out list))
            {
                list = new List<TableFragmentTag>();
                list.Add(startTag);
                map.Add(hash, list);

                startTags.Add(startTag);
                endTags.Add(endTag);
            }
            else if (!AddEquivilenceList(list, startTag))
            {
                startTags.Add(startTag);
                endTags.Add(endTag);
            }
        }

        startTags.Sort(Comparison);
        endTags.Sort(Comparison);

        LinkedList<TableFragmentTag> startFragments = new LinkedList<TableFragmentTag>(startTags);
        LinkedList<TableFragmentTag> endFragments = new LinkedList<TableFragmentTag>(endTags);

        return Combine(startFragments, endFragments);
    }

    /// <summary>
    /// Indicates the number of "don't care" elements between the offset origin and the first value.
    /// </summary>
    /// <remarks>
    /// This is useful if it is known that the first N elements can never be used, and as such can have
    /// any value.
    /// </remarks>
    public int Skip
    {
        [DebuggerStepThrough]
        get { return _skip; }
    }

    public int this[int index]
    {
        [DebuggerStepThrough]
        get { return _values[index]; }
    }

    /// <summary>
    /// Gets the offset into this fragment where the values associated with the given state can be found.
    /// </summary>
    /// <remarks>
    /// Returns null if the state was never combined into this fragment.
    /// </remarks>
    public int? GetOffset(int state)
    {
        int index = Array.BinarySearch(_states, state);
        return index < 0 ? new int?() : _offsets[index];
    }

    static void CreateTags(TableFragment fragment, out TableFragmentTag startTag, out TableFragmentTag endTag, out int hash)
    {
        if (fragment == null) throw new ArgumentNullException("fragment");

        int[] values = fragment._values;

        int startIndex = 0;
        int startValue = values[0];
        int endIndex = 0;
        int endValue = values[values.Length - 1];
        uint currentHash = 0;
        bool startSet = false;

        for (int i = 0; i < values.Length; i++)
        {
            int val = values[i];

            if (!startSet && val != startValue)
            {
                startSet = true;
                startIndex = i;
            }

            if (val != endValue)
            {
                endIndex = i;
            }

            currentHash = unchecked(((currentHash >> 5) | (currentHash << 27)) ^ (uint)val);
        }

        startTag = new TableFragmentTag()
        {
            Fragment = fragment,
            EndLen = startSet ? startIndex : values.Length,
            EndValue = startValue,
        };

        endTag = new TableFragmentTag()
        {
            Fragment = fragment,
            EndLen = startSet ? values.Length - endIndex - 1 : values.Length,
            EndValue = endValue,
        };

        startTag.Partner = endTag;
        endTag.Partner = startTag;
        hash = (int)currentHash;
    }

    /// <summary>
    /// Replace the TableFragments referenced by the two tags with a single fragment that
    /// contains their combined values.
    /// </summary>
    /// <remarks>
    /// The end of tag1 will be made to overlap the start of tag2 as far as possible.
    /// </remarks>
    static void Combine(TableFragmentTag tag1, TableFragmentTag tag2)
    {
        int overlap;

        if (tag1.EndValue != tag2.EndValue)
        {
            overlap = 0;
        }
        else
        {
            overlap = Math.Min(tag1.EndLen, tag2.EndLen);
        }

        TableFragment fragment = Combine(tag1.Fragment, tag2.Fragment, overlap);

        tag1.Partner.Fragment = fragment;
        tag2.Partner.Fragment = fragment;
        tag1.Partner.Partner = tag2.Partner;
        tag2.Partner.Partner = tag1.Partner;
    }

    static TableFragment Combine(TableFragment fragment1, TableFragment fragment2, int overlap)
    {
        int[] values1 = fragment1._values;
        int[] values2 = fragment2._values;
        int[] states1 = fragment1._states;
        int[] states2 = fragment2._states;
        int[] offsets1 = fragment1._offsets;
        int[] offsets2 = fragment2._offsets;

        int offset = values1.Length - overlap;
        int[] values = new int[offset + values2.Length];
        int[] states = new int[states1.Length + states2.Length];
        int[] offsets = new int[states.Length];

        Array.Copy(values1, values, offset);
        Array.Copy(values2, 0, values, offset, values2.Length);

        int read1 = 0;
        int read2 = 0;
        int write = 0;

        while (read1 < states1.Length && read2 < states2.Length)
        {
            int diff = states1[read1] - states2[read2];

            if (diff == 0)
                throw new InvalidOperationException("attempting to combine 2 fragments that both contain the state " + states1[read1]);

            if (diff < 0)
            {
                states[write] = states1[read1];
                offsets[write] = offsets1[read1];
                write++;
                read1++;
            }
            else
            {
                states[write] = states2[read2];
                offsets[write] = offsets2[read2] + offset;
                write++;
                read2++;
            }
        }

        while (read1 < states1.Length)
        {
            states[write] = states1[read1];
            offsets[write] = offsets1[read1];
            write++;
            read1++;
        }

        while (read2 < states2.Length)
        {
            states[write] = states2[read2];
            offsets[write] = offsets2[read2] + offset;
            write++;
            read2++;
        }

        return new TableFragment(values, states, offsets, Math.Max(fragment1.Skip, fragment2.Skip - offset));
    }

    static TableFragment Combine(LinkedList<TableFragmentTag> startFragmentTags, LinkedList<TableFragmentTag> endFragmentTags)
    {
        LinkedList<TableFragmentTag> finalFragments = new LinkedList<TableFragmentTag>();

        while (startFragmentTags.Count > 0 && endFragmentTags.Count > 0)
        {
            LinkedListNode<TableFragmentTag> tailNode = startFragmentTags.First;
            LinkedListNode<TableFragmentTag> leadNode = endFragmentTags.First;

            if (tailNode.Value.Fragment == leadNode.Value.Fragment)
            {
                // if the tailNode and leadNode reference the same fragment, then try to replace one
                // of them as bad things happen when overlapping a fragment with itself.

                if (leadNode.Next != null)
                {
                    leadNode = leadNode.Next;
                }
                else if (tailNode.Next != null)
                {
                    tailNode = tailNode.Next;
                }
                else
                {
                    startFragmentTags.Remove(tailNode);
                    finalFragments.AddLast(tailNode);
                    break;
                }
            }

            int leadEnd = leadNode.Value.EndValue;
            int tailStart = tailNode.Value.EndValue;

            if (leadEnd < tailStart)
            {
                endFragmentTags.Remove(leadNode);
            }
            else if (tailStart < leadEnd)
            {
                startFragmentTags.Remove(tailNode);
                finalFragments.AddLast(tailNode);
            }
            else
            {
                endFragmentTags.Remove(leadNode);
                startFragmentTags.Remove(tailNode);
                TableFragment.Combine(leadNode.Value, tailNode.Value);
            }
        }

        while (startFragmentTags.Count > 0)
        {
            LinkedListNode<TableFragmentTag> node = startFragmentTags.First;
            startFragmentTags.Remove(node);
            finalFragments.AddLast(node);
        }

        return ForceCombine(finalFragments);
    }

    /// <summary>
    /// Append all fragments end to end without any attempt to overlap any of them.
    /// </summary>
    /// <remarks>
    /// Used as a last resort for when all attempts to overlap the fragments have failed.
    /// </remarks>
    static TableFragment ForceCombine(LinkedList<TableFragmentTag> finalFragments)
    {
        int totalLen = 0;
        int totalStates = 0;

        foreach (TableFragmentTag tag in finalFragments)
        {
            TableFragment fragment = tag.Fragment;
            totalLen += fragment._values.Length;
            totalStates += fragment._states.Length;
        }

        int newSkip = 0;
        int[] newValues = new int[totalLen];
        int[] newStates = new int[totalStates];
        int[] newOffsets = new int[totalStates];

        totalLen = 0;
        totalStates = 0;

        foreach (TableFragmentTag tag in finalFragments)
        {
            TableFragment fragment = tag.Fragment;
            int[] values = fragment._values;
            int[] states = fragment._states;
            int[] offsets = fragment._offsets;

            newSkip = Math.Max(newSkip, tag.Fragment.Skip - totalLen);

            Array.Copy(states, 0, newStates, totalStates, states.Length);
            Array.Copy(values, 0, newValues, totalLen, values.Length);

            for (int i = 0; i < states.Length; i++)
            {
                newOffsets[i + totalStates] = offsets[i] + totalLen;
            }

            totalLen += values.Length;
            totalStates += states.Length;
        }

        Array.Sort(newStates, newOffsets);

        return new TableFragment(newValues, newStates, newOffsets, newSkip);
    }

    /// <remarks>
    /// check for fragments with the same values, if one is found then combine them, otherwise add startTag to the list.
    /// </remarks>
    static bool AddEquivilenceList(List<TableFragmentTag> list, TableFragmentTag startTag)
    {
        for (int i = 0; i < list.Count; i++)
        {
            TableFragmentTag existingTag = list[i];
            TableFragment existing = existingTag.Fragment;
            TableFragment fragment = startTag.Fragment;

            if (ValuesEquivilent(existing._values, fragment._values))
            {
                existingTag.Partner.Fragment = existingTag.Fragment = Combine(existing, fragment, existing.Count);
                return true;
            }
        }

        list.Add(startTag);
        return false;
    }

    static bool ValuesEquivilent(int[] values1, int[] values2)
    {
        if (values1.Length != values2.Length) return false;

        for (int i = 0; i < values1.Length; i++)
        {
            if (values1[i] != values2[i]) return false;
        }

        return true;
    }

    static int Comparison(TableFragmentTag tag1, TableFragmentTag tag2)
    {
        int diff = tag1.EndValue - tag2.EndValue;
        return diff != 0 ? diff : tag2.EndLen - tag1.EndLen;
    }

    #region IList<int> Members

    // ...

    #endregion

    #region ICollection<int> Members

    public void CopyTo(int[] array, int arrayIndex)
    {
        Array.Copy(_values, 0, array, arrayIndex, _values.Length);
    }

    // ...

    #endregion

    #region ICollection Members

    // ...

    #endregion

    #region IEnumerable<int> Members

    // ...

    #endregion

    #region IEnumerable Members

    // ...

    #endregion

    /// <summary>
    /// The TableFragmentTag class is a reference to one end of a TableFragment, either the start or the end.
    /// </summary>
    [DebuggerDisplay("{EndValue} x {EndLen}")]
    sealed class TableFragmentTag
    {
        /// <summary>
        /// The Fragment this tag references.
        /// </summary>
        public TableFragment Fragment { get; set; }

        /// <summary>
        /// The tag for the other end of the fragment.
        /// </summary>
        public TableFragmentTag Partner { get; set; }

        /// <summary>
        /// The value at this end of the fragment.
        /// </summary>
        public int EndValue { get; set; }

        /// <summary>
        /// The number of consecitive times EndValue appears.
        /// </summary>
        public int EndLen { get; set; }
    }

    [DebuggerBrowsable(DebuggerBrowsableState.Never)]
    readonly int _skip;
    readonly int[] _values;
    readonly int[] _states;
    readonly int[] _offsets;
}

И вот простой пример, как можно использовать.

int[][] table = GenerateTable();

TableFragment[] fragments = new TableFragment[table.Length];

for (int state = 0; state < table.Length; state++)
{
    fragments[state] = new TableFragment(table[state], state);
}

TableFragment result = TableFragment.Combine(fragments);

int[] A = new int[result.Count + result.Skip];
result.CopyTo(A, result.Skip);

int[] I = new int[table.Length];

for (int state = 0; state < table.Length; state++)
{
    I[state] = result.GetOffset(state).Value;
}


260
5
c#
задан 16 марта 2011 в 11:03 Источник Поделиться
Комментарии
1 ответ

Читал в течение последних нескольких дней о чистый код и я могу немного сложности в вашем. Просто некоторые вещи, которые я думаю, что бы улучшить читабельность:

Несколько аналогичных блоков, как :

        states[write] = states1[read1];
offsets[write] = offsets1[read1];
write++;
read1++;

если это может быть преобразована в функцию, это будет означать + для удобочитаемости.

Потому что иначе, если я хотел заменить

      if (diff < 0)
{
states[write] = states1[read1];
---
}
else if (diff > 0)
{
states[write] = states2[read2];
---
}
else
{
throw new InvalidOperationException ....
}

с:

if (diff == 0)
throw new ....

if (diff > 0)
{
...
}
else
{
...
}

Это также выглядит, как дублировать код:

    while (read1 < states1.Length)
{
states[write] = states1[read1];
offsets[write] = offsets1[read1];
write++;
read1++;
}

while (read2 < states2.Length)
{
states[write] = states2[read2];
offsets[write] = offsets2[read2] + offset;
write++;
read2++;
}

4
ответ дан 16 марта 2011 в 05:03 Источник Поделиться