From 543235b805d73b30f778255415ff2ec0d64b4334 Mon Sep 17 00:00:00 2001 From: Jeff Schumacher Date: Thu, 24 Jun 2010 13:39:50 -0700 Subject: Added support for whitespace ignoring JGit did not have support for skipping whitespace when comparing lines in RawText objects. I added a subclass of RawText that skips whitespace in its equals and hashCode methods. I used a subclass rather than adding functionality into RawText so that performance would not be impacted by extra logic. This class only supports ignoring all whitespace. Others will follow that allow other forms of whitespace ignoring. Change-Id: Ic2f79e85215e48d3fd53ec1b4ad13373dd183a4a --- .../jgit/diff/RawTextIgnoreAllWhitespaceTest.java | 96 +++++++++++++++++ .../tst/org/eclipse/jgit/util/RawCharUtilTest.java | 108 +++++++++++++++++++ .../jgit/diff/RawTextIgnoreAllWhitespace.java | 120 +++++++++++++++++++++ .../src/org/eclipse/jgit/util/RawCharUtil.java | 116 ++++++++++++++++++++ 4 files changed, 440 insertions(+) create mode 100644 org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RawTextIgnoreAllWhitespaceTest.java create mode 100644 org.eclipse.jgit.test/tst/org/eclipse/jgit/util/RawCharUtilTest.java create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextIgnoreAllWhitespace.java create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/util/RawCharUtil.java diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RawTextIgnoreAllWhitespaceTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RawTextIgnoreAllWhitespaceTest.java new file mode 100644 index 0000000000..5e1a238a72 --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RawTextIgnoreAllWhitespaceTest.java @@ -0,0 +1,96 @@ +/* + * Copyright (C) 2009-2010, Google Inc. + * Copyright (C) 2009, Johannes E. Schindelin + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.diff; + +import org.eclipse.jgit.lib.Constants; + +import junit.framework.TestCase; + +public class RawTextIgnoreAllWhitespaceTest extends TestCase { + public void testEqualsWithoutWhitespace() { + final RawText a = new RawTextIgnoreAllWhitespace(Constants + .encodeASCII("foo-a\nfoo-b\nfoo\n")); + final RawText b = new RawTextIgnoreAllWhitespace(Constants + .encodeASCII("foo-b\nfoo-c\nf\n")); + + assertEquals(3, a.size()); + assertEquals(3, b.size()); + + // foo-a != foo-b + assertFalse(a.equals(0, b, 0)); + assertFalse(b.equals(0, a, 0)); + + // foo-b == foo-b + assertTrue(a.equals(1, b, 0)); + assertTrue(b.equals(0, a, 1)); + + // foo != f + assertFalse(a.equals(2, b, 2)); + assertFalse(b.equals(2, a, 2)); + } + + public void testEqualsWithWhitespace() { + final RawText a = new RawTextIgnoreAllWhitespace(Constants + .encodeASCII("foo-a\n \n a b c\na \n")); + final RawText b = new RawTextIgnoreAllWhitespace(Constants + .encodeASCII("foo-a b\n\nab c\na\n")); + + // "foo-a" != "foo-a b" + assertFalse(a.equals(0, b, 0)); + assertFalse(b.equals(0, a, 0)); + + // " " == "" + assertTrue(a.equals(1, b, 1)); + assertTrue(b.equals(1, a, 1)); + + // " a b c" == "ab c" + assertTrue(a.equals(2, b, 2)); + assertTrue(b.equals(2, a, 2)); + + // "a " == "a" + assertTrue(a.equals(3, b, 3)); + assertTrue(b.equals(3, a, 3)); + } +} diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/RawCharUtilTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/RawCharUtilTest.java new file mode 100644 index 0000000000..6747b26adc --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/RawCharUtilTest.java @@ -0,0 +1,108 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.util; + +import java.io.UnsupportedEncodingException; + +import junit.framework.TestCase; +import static org.eclipse.jgit.util.RawCharUtil.isWhitespace; +import static org.eclipse.jgit.util.RawCharUtil.trimTrailingWhitespace; +import static org.eclipse.jgit.util.RawCharUtil.trimLeadingWhitespace; + +public class RawCharUtilTest extends TestCase { + + /** + * Test method for {@link RawCharUtil#isWhitespace(byte)}. + */ + public void testIsWhitespace() { + for (byte c = -128; c < 127; c++) { + switch (c) { + case (byte) '\r': + case (byte) '\n': + case (byte) '\t': + case (byte) ' ': + assertTrue(isWhitespace(c)); + break; + default: + assertFalse(isWhitespace(c)); + } + } + } + + /** + * Test method for + * {@link RawCharUtil#trimTrailingWhitespace(byte[], int, int)}. + * + * @throws UnsupportedEncodingException + */ + public void testTrimTrailingWhitespace() + throws UnsupportedEncodingException { + assertEquals(0, trimTrailingWhitespace("".getBytes("US-ASCII"), 0, 0)); + assertEquals(0, trimTrailingWhitespace(" ".getBytes("US-ASCII"), 0, 1)); + assertEquals(1, trimTrailingWhitespace("a ".getBytes("US-ASCII"), 0, 2)); + assertEquals(2, + trimTrailingWhitespace(" a ".getBytes("US-ASCII"), 0, 3)); + assertEquals(3, + trimTrailingWhitespace(" a".getBytes("US-ASCII"), 0, 3)); + assertEquals(6, trimTrailingWhitespace( + " test ".getBytes("US-ASCII"), 2, 9)); + } + + /** + * Test method for + * {@link RawCharUtil#trimLeadingWhitespace(byte[], int, int)}. + * + * @throws UnsupportedEncodingException + */ + public void testTrimLeadingWhitespace() throws UnsupportedEncodingException { + assertEquals(0, trimLeadingWhitespace("".getBytes("US-ASCII"), 0, 0)); + assertEquals(1, trimLeadingWhitespace(" ".getBytes("US-ASCII"), 0, 1)); + assertEquals(0, trimLeadingWhitespace("a ".getBytes("US-ASCII"), 0, 2)); + assertEquals(1, trimLeadingWhitespace(" a ".getBytes("US-ASCII"), 0, 3)); + assertEquals(2, trimLeadingWhitespace(" a".getBytes("US-ASCII"), 0, 3)); + assertEquals(2, trimLeadingWhitespace(" test ".getBytes("US-ASCII"), + 2, 9)); + } + +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextIgnoreAllWhitespace.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextIgnoreAllWhitespace.java new file mode 100644 index 0000000000..f72259605e --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextIgnoreAllWhitespace.java @@ -0,0 +1,120 @@ +/* + * Copyright (C) 2009-2010, Google Inc. + * Copyright (C) 2008-2009, Johannes E. Schindelin + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.diff; + +import static org.eclipse.jgit.util.RawCharUtil.isWhitespace; +import static org.eclipse.jgit.util.RawCharUtil.trimTrailingWhitespace; + +/** + * A version of {@link RawText} that ignores all whitespace. + */ +public class RawTextIgnoreAllWhitespace extends RawText { + + /** + * Create a new sequence from an existing content byte array. + *

+ * The entire array (indexes 0 through length-1) is used as the content. + * + * @param input + * the content array. The array is never modified, so passing + * through cached arrays is safe. + */ + public RawTextIgnoreAllWhitespace(byte[] input) { + super(input); + } + + @Override + public boolean equals(final int i, final Sequence other, final int j) { + return equals(this, i + 1, (RawText) other, j + 1); + } + + private static boolean equals(final RawText a, final int ai, + final RawText b, final int bi) { + if (a.hashes.get(ai) != b.hashes.get(bi)) + return false; + + int as = a.lines.get(ai); + int bs = b.lines.get(bi); + int ae = a.lines.get(ai + 1); + int be = b.lines.get(bi + 1); + + ae = trimTrailingWhitespace(a.content, as, ae); + be = trimTrailingWhitespace(b.content, bs, be); + + while (as < ae && bs < be) { + byte ac = a.content[as]; + byte bc = b.content[bs]; + + while (as < ae - 1 && isWhitespace(ac)) { + as++; + ac = a.content[as]; + } + + while (bs < be - 1 && isWhitespace(bc)) { + bs++; + bc = b.content[bs]; + } + + if (ac != bc) + return false; + + as++; + bs++; + } + + return as == ae && bs == be; + } + + @Override + protected int hashLine(final byte[] raw, int ptr, final int end) { + int hash = 5381; + for (; ptr < end; ptr++) { + byte c = raw[ptr]; + if (!isWhitespace(c)) + hash = (hash << 5) ^ (c & 0xff); + } + return hash; + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/util/RawCharUtil.java b/org.eclipse.jgit/src/org/eclipse/jgit/util/RawCharUtil.java new file mode 100644 index 0000000000..9b4e542ef3 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/util/RawCharUtil.java @@ -0,0 +1,116 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.util; + +/** + * Utility class for character functions on raw bytes + *

+ * Characters are assumed to be 8-bit US-ASCII. + */ +public class RawCharUtil { + private static final boolean[] WHITESPACE = new boolean[256]; + + static { + WHITESPACE['\r'] = true; + WHITESPACE['\n'] = true; + WHITESPACE['\t'] = true; + WHITESPACE[' '] = true; + } + + /** + * Determine if an 8-bit US-ASCII encoded character is represents whitespace + * + * @param c + * the 8-bit US-ASCII encoded character + * @return true if c represents a whitespace character in 8-bit US-ASCII + */ + public static boolean isWhitespace(byte c) { + return WHITESPACE[c & 0xff]; + } + + /** + * Returns the new end point for the byte array passed in after trimming any + * trailing whitespace characters, as determined by the isWhitespace() + * function. start and end are assumed to be within the bounds of raw. + * + * @param raw + * the byte array containing the portion to trim whitespace for + * @param start + * the start of the section of bytes + * @param end + * the end of the section of bytes + * @return the new end point + */ + public static int trimTrailingWhitespace(byte[] raw, int start, int end) { + int ptr = end - 1; + while (start <= ptr && isWhitespace(raw[ptr])) + ptr--; + + return ptr + 1; + } + + /** + * Returns the new start point for the byte array passed in after trimming + * any leading whitespace characters, as determined by the isWhitespace() + * function. start and end are assumed to be within the bounds of raw. + * + * @param raw + * the byte array containing the portion to trim whitespace for + * @param start + * the start of the section of bytes + * @param end + * the end of the section of bytes + * @return the new start point + */ + public static int trimLeadingWhitespace(byte[] raw, int start, int end) { + while (start < end && isWhitespace(raw[start])) + start++; + + return start; + } + + private RawCharUtil() { + // This will never be called + } +} -- cgit v1.2.3