JAVA调优:字符串操作性能比较

旧文搬迁:2008-5-27

字符串处理代码的效率与编译器对代码的优化算法是相关的。

1、字符串与常量的连接

1
2
3
String str = "abc" + "123" + 456;
//经java编译器优化后,执行的代码等同于:
String str = "abc123456";

2、字符串与变量的连接

1
2
3
4
5
6
7
(1)
int i = 123;
String str = "abc" + i;

(2)
int i = 123;
String str = "" + i;

字符串之间的运算符 + 的实现,实际上是通过新建一个StringBuffer来实现的。

编译之后,实际运行的等效代码如下:

1
2
3
4
5
6
7
8
9
10
11
//(1)
int i = 123;
StringBuffer sbuff = new StringBuffer("abc");
sbuff.append(i);
String str = sbuff.toString();

//(2)
int i = 123;
StringBuffer sbuff = new StringBuffer();
sbuff.append(i);
String str = sbuff.toString();

所以,类似(2)的代码应该使用 String.valueOf(i) 替代,避免创建StringBuffer对象。

1
2
3
4
//(3)
int ia = 123;
int ib = 456;
String str = "abc" + ia + "def" + ib;

等同于:

1
2
3
4
5
6
7
8
//(3)
int ia = 123;
int ib = 456;
StringBuffer sbuff = new StringBuffer("abc");
sbuff.append(ia);
sbuff.append("def");
sbuff.append(ib);
String str = sbuff.toString();

事实上,每个字符串连接的表达式会且仅会创建一个StringBuffer对象。 例:

1
2
3
4
5
//(4)
int ia = 123;
String str = "abc" + ia;
int ib = 456;
str = str + ib;

编译后执行的等价代码是:

1
2
3
4
5
6
7
8
9
10
11
//(4)
int ia = 123;
StringBuffer sbuff = new StringBuffer("abc");
sbuff.append(ia);
String str = sbuff.toString();

int ib = 456;
StringBuffer sbuff = new StringBuffer(str);
sbuff.append("def");
sbuff.append(ib);
str = sbuff.toString();

所以,如果你在进行字符串相加时 使用了多个表达式, 那么一定要使用自己创建的 StringBuffer来进行字符串连接。

3、反编译分析

代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

/**
* StringTuning
*
* @author crazyzh1984
* @version 1.0, 2008-5-22
*/
public class StringTuning {

protected static final Log logger = LogFactory.getLog(StringTuning.class);

protected static void test1() {
String str = "abc" + 123;// abc123
logger.debug(str);
}

protected static void test2() {
int i = 123;
String str = "abc" + i;
logger.debug(str);
}

protected static void test3() {
int i = 123;
String str = "" + i;
logger.debug(str);
}

protected static void test4() {
int ia = 123;
int ib = 456;
String str = "abc" + ia + "def" + ib;
logger.debug(str);
}

protected static void test5() {
int ia = 123;
int ib = 456;
String str1 = "abc" + ia;
str1 = str1 + ib;
logger.debug(str1);
}
}

反编译的结果:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
Compiled from "StringTuning.java"
public class StringTuning extends java.lang.Object{
protected static final org.apache.commons.logging.Log logger;

static java.lang.Class class$0;

static {};
Code:
0: getstatic #13; //Field class$0java/lang/Class;
3: dup
4: ifnonnull 32
7: pop
8: ldc #15; //String com.thunisoft.codereview1.StringTuning
10: invokestatic #17; //Method java/lang/Class.forNameLjava/lang/String;)Ljava/lang/Class;
13: dup
14: putstatic #13; //Field class$0java/lang/Class;
17: goto 32
20: new #23; //class java/lang/NoClassDefFoundError
23: dup_x1
24: swap
25: invokevirtual #25; //Method java/lang/Throwable.getMessage)Ljava/lang/String;
28: invokespecial #31; //Method java/lang/NoClassDefFoundError.""Ljava/lang/String;)V
31: athrow
32: invokestatic #35; //Method org/apache/commons/logging/LogFactory.getLogLjava/lang/Class;)Lorg/apache/commons/logging/Log;
35: putstatic #41; //Field loggerorg/apache/commons/logging/Log;
38: return
Exception table:
from to target type
8 13 20 Class java/lang/ClassNotFoundException

public com.thunisoft.codereview1.StringTuning();
Code:
0: aload_0
1: invokespecial #47; //Method java/lang/Object."")V
4: return

//函数1
protected static void test1();
Code:
0: ldc #52; //String abc123 注:字符串赋值为 "abc123"
2: astore_0
3: getstatic #41; //Field loggerorg/apache/commons/logging/Log;
6: aload_0
7: invokeinterface #54, 2; //InterfaceMethod org/apache/commons/logging/Log.debugLjava/lang/Object;)V
12: return

//函数2
protected static void test2();
Code:
0: bipush 123
2: istore_0
3: new #63; //class java/lang/StringBuffer 注:创建了一个StringBuffer对象
6: dup
7: ldc #65; //String abc
9: invokespecial #67; //Method java/lang/StringBuffer.""Ljava/lang/String;)V
12: iload_0
13: invokevirtual #68; //Method java/lang/StringBuffer.appendI)Ljava/lang/StringBuffer;
16: invokevirtual #72; //Method java/lang/StringBuffer.toString)Ljava/lang/String;
19: astore_1
20: getstatic #41; //Field loggerorg/apache/commons/logging/Log;
23: aload_1
24: invokeinterface #54, 2; //InterfaceMethod org/apache/commons/logging/Log.debugLjava/lang/Object;)V
29: return

//函数3
protected static void test3();
Code:
0: bipush 123
2: istore_0
3: new #63; //class java/lang/StringBuffer 注:创建了一个StringBuffer对象
6: dup
7: invokespecial #78; //Method java/lang/StringBuffer."")V
10: iload_0
11: invokevirtual #68; //Method java/lang/StringBuffer.appendI)Ljava/lang/StringBuffer;
14: invokevirtual #72; //Method java/lang/StringBuffer.toString)Ljava/lang/String;
17: astore_1
18: getstatic #41; //Field loggerorg/apache/commons/logging/Log;
21: aload_1
22: invokeinterface #54, 2; //InterfaceMethod org/apache/commons/logging/Log.debugLjava/lang/Object;)V
27: return

//函数4
protected static void test4();
Code:
0: bipush 123
2: istore_0
3: sipush 456
6: istore_1
7: new #63; //class java/lang/StringBuffer 注:创建了一个StringBuffer对象
10: dup
11: ldc #65; //String abc
13: invokespecial #67; //Method java/lang/StringBuffer.""Ljava/lang/String;)V
16: iload_0
17: invokevirtual #68; //Method java/lang/StringBuffer.appendI)Ljava/lang/StringBuffer;
20: ldc #80; //String def
22: invokevirtual #82; //Method java/lang/StringBuffer.appendLjava/lang/String;)Ljava/lang/StringBuffer;
25: iload_1
26: invokevirtual #68; //Method java/lang/StringBuffer.appendI)Ljava/lang/StringBuffer;
29: invokevirtual #72; //Method java/lang/StringBuffer.toString)Ljava/lang/String;
32: astore_2
33: getstatic #41; //Field loggerorg/apache/commons/logging/Log;
36: aload_2
37: invokeinterface #54, 2; //InterfaceMethod org/apache/commons/logging/Log.debugLjava/lang/Object;)V
42: return

//函数5
protected static void test5();
Code:
0: bipush 123
2: istore_0
3: sipush 456
6: istore_1
7: new #63; //class java/lang/StringBuffer 注:创建了一个StringBuffer对象
10: dup
11: ldc #65; //String abc
13: invokespecial #67; //Method java/lang/StringBuffer.""Ljava/lang/String;)V
16: iload_0
17: invokevirtual #68; //Method java/lang/StringBuffer.appendI)Ljava/lang/StringBuffer;
20: invokevirtual #72; //Method java/lang/StringBuffer.toString)Ljava/lang/String;
23: astore_2
24: new #63; //class java/lang/StringBuffer 注:又创建了一个StringBuffer对象
27: dup
28: aload_2
29: invokestatic #88; //Method java/lang/String.valueOfLjava/lang/Object;)Ljava/lang/String;
32: invokespecial #67; //Method java/lang/StringBuffer.""Ljava/lang/String;)V
35: iload_1
36: invokevirtual #68; //Method java/lang/StringBuffer.appendI)Ljava/lang/StringBuffer;
39: invokevirtual #72; //Method java/lang/StringBuffer.toString)Ljava/lang/String;
42: astore_2
43: getstatic #41; //Field loggerorg/apache/commons/logging/Log;
46: aload_2
47: invokeinterface #54, 2; //InterfaceMethod org/apache/commons/logging/Log.debugLjava/lang/Object;)V
52: return

}

对于被定义成final的原始数据类型的常量,会被静态编译到执行的代码中。 例:

1
2
3
4
5
6
7
8
9
protected static final int CODE_TEST = 888;

protected static void test6() {
int ia = 123;
final int ib = 999;
String str = "abc" + CODE_TEST + ia;
str = "abc" + ib + ia;
logger.debug(str);
}

编译后的执行代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
protected static void test6();
Code:
0: bipush 123
2: istore_0
3: sipush 999
6: istore_1
7: new #67; //class java/lang/StringBuffer
10: dup
11: ldc #99; //String abc888 注:"abc" + CODE_TEST 被静态编译为 "abc888"
13: invokespecial #71; //Method java/lang/StringBuffer."":(Ljava/lang/String;)V
16: iload_0
17: invokevirtual #72; //Method java/lang/StringBuffer.append:(I)Ljava/lang/StringBuffer;
20: invokevirtual #76; //Method java/lang/StringBuffer.toString:()Ljava/lang/String;
23: astore_2
24: new #67; //class java/lang/StringBuffer
27: dup
28: ldc #101; //String abc999 注:"abc" + ib 被静态编译为 "abc999"
30: invokespecial #71; //Method java/lang/StringBuffer."":(Ljava/lang/String;)V
33: iload_0
34: invokevirtual #72; //Method java/lang/StringBuffer.append:(I)Ljava/lang/StringBuffer;
37: invokevirtual #76; //Method java/lang/StringBuffer.toString:()Ljava/lang/String;
40: astore_2
41: getstatic #45; //Field logger:Lorg/apache/commons/logging/Log;
44: aload_2
45: invokeinterface #58, 2; //InterfaceMethod org/apache/commons/logging/Log.debug:(Ljava/lang/Object;)V
50: return

4、测试

假定每秒要处理 count 次字符串相加运算,则测试代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
public static void main(String[] args) {
final int count = 10000;

// 使用字符串相加
long start = System.currentTimeMillis();
String str = "str=";
for (int i = 0; i < count; i++) {
str = str + i;
}
long elapse = System.currentTimeMillis() - start;
logger.info("elapse 1: " + elapse);

// 使用StringBuffer
start = System.currentTimeMillis();
StringBuffer sbuff = new StringBuffer("str=");
for (int i = 0; i < count; i++) {
sbuff.append(i);
}
str = sbuff.toString();
elapse = System.currentTimeMillis() - start;
logger.info("elapse 2: " + elapse);
}

当 count=10000 时,运行结果为:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
2008-5-27 19:50:24 StringTuning main
信息: elapse 1: 968
2008-5-27 19:50:24 StringTuning main
信息: elapse 2: 0

2008-5-27 19:50:36 StringTuning main
信息: elapse 1: 984
2008-5-27 19:50:36 StringTuning main
信息: elapse 2: 0

2008-5-27 19:50:43 StringTuning main
信息: elapse 1: 968
2008-5-27 19:50:43 StringTuning main
信息: elapse 2: 15

当 count=20000 时,运行结果为:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
2008-5-27 19:48:54 StringTuning main
信息: elapse 1: 9281
2008-5-27 19:48:54 StringTuning main
信息: elapse 2: 0

2008-5-27 19:51:04 StringTuning main
信息: elapse 1: 9218
2008-5-27 19:51:04 StringTuning main
信息: elapse 2: 0

2008-5-27 19:51:26 StringTuning main
信息: elapse 1: 9281
2008-5-27 19:51:26 StringTuning main
信息: elapse 2: 16

我们发现,耗费的时间比运算量的增加的更快,创建每个StringBuffer的成本急剧增加。
为什么呢?原因是内存,频繁的对象创建将导致JVM频繁的GC。
我们可以估算一下2种方式创建的对象个数。

方式1创建的对象个数为:
count*2 (每次”+”操作创建一个StringBuffer和一个String对象)

方式2创建的对象个数为:
log2(count)+1 (因为StringBuffer的数组扩容每次乘2)

实际上,扩容的次数跟StringBuffer的初始大小以及要操作的字符串大小有关。