Context Navigation

source: trunk/third/gcc/unroll.c @ 11288

Visit:

Revision 11288, 121.9 KB checked in by ghudson, 26 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r11287, which included commits to RCS files with non-trunk default branches.

Line
1	/* Try to unroll loops, and split induction variables.
2	Copyright (C) 1992, 93, 94, 95, 97, 1998 Free Software Foundation, Inc.
3	Contributed by James E. Wilson, Cygnus Support/UC Berkeley.
4
5	This file is part of GNU CC.
6
7	GNU CC is free software; you can redistribute it and/or modify
8	it under the terms of the GNU General Public License as published by
9	the Free Software Foundation; either version 2, or (at your option)
10	any later version.
11
12	GNU CC is distributed in the hope that it will be useful,
13	but WITHOUT ANY WARRANTY; without even the implied warranty of
14	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15	GNU General Public License for more details.
16
17	You should have received a copy of the GNU General Public License
18	along with GNU CC; see the file COPYING. If not, write to
19	the Free Software Foundation, 59 Temple Place - Suite 330,
20	Boston, MA 02111-1307, USA. */
21
22	/* Try to unroll a loop, and split induction variables.
23
24	Loops for which the number of iterations can be calculated exactly are
25	handled specially. If the number of iterations times the insn_count is
26	less than MAX_UNROLLED_INSNS, then the loop is unrolled completely.
27	Otherwise, we try to unroll the loop a number of times modulo the number
28	of iterations, so that only one exit test will be needed. It is unrolled
29	a number of times approximately equal to MAX_UNROLLED_INSNS divided by
30	the insn count.
31
32	Otherwise, if the number of iterations can be calculated exactly at
33	run time, and the loop is always entered at the top, then we try to
34	precondition the loop. That is, at run time, calculate how many times
35	the loop will execute, and then execute the loop body a few times so
36	that the remaining iterations will be some multiple of 4 (or 2 if the
37	loop is large). Then fall through to a loop unrolled 4 (or 2) times,
38	with only one exit test needed at the end of the loop.
39
40	Otherwise, if the number of iterations can not be calculated exactly,
41	not even at run time, then we still unroll the loop a number of times
42	approximately equal to MAX_UNROLLED_INSNS divided by the insn count,
43	but there must be an exit test after each copy of the loop body.
44
45	For each induction variable, which is dead outside the loop (replaceable)
46	or for which we can easily calculate the final value, if we can easily
47	calculate its value at each place where it is set as a function of the
48	current loop unroll count and the variable's value at loop entry, then
49	the induction variable is split into `N' different variables, one for
50	each copy of the loop body. One variable is live across the backward
51	branch, and the others are all calculated as a function of this variable.
52	This helps eliminate data dependencies, and leads to further opportunities
53	for cse. */
54
55	/* Possible improvements follow: */
56
57	/* ??? Add an extra pass somewhere to determine whether unrolling will
58	give any benefit. E.g. after generating all unrolled insns, compute the
59	cost of all insns and compare against cost of insns in rolled loop.
60
61	- On traditional architectures, unrolling a non-constant bound loop
62	is a win if there is a giv whose only use is in memory addresses, the
63	memory addresses can be split, and hence giv increments can be
64	eliminated.
65	- It is also a win if the loop is executed many times, and preconditioning
66	can be performed for the loop.
67	Add code to check for these and similar cases. */
68
69	/* ??? Improve control of which loops get unrolled. Could use profiling
70	info to only unroll the most commonly executed loops. Perhaps have
71	a user specifyable option to control the amount of code expansion,
72	or the percent of loops to consider for unrolling. Etc. */
73
74	/* ??? Look at the register copies inside the loop to see if they form a
75	simple permutation. If so, iterate the permutation until it gets back to
76	the start state. This is how many times we should unroll the loop, for
77	best results, because then all register copies can be eliminated.
78	For example, the lisp nreverse function should be unrolled 3 times
79	while (this)
80	{
81	next = this->cdr;
82	this->cdr = prev;
83	prev = this;
84	this = next;
85	}
86
87	??? The number of times to unroll the loop may also be based on data
88	references in the loop. For example, if we have a loop that references
89	x[i-1], x[i], and x[i+1], we should unroll it a multiple of 3 times. */
90
91	/* ??? Add some simple linear equation solving capability so that we can
92	determine the number of loop iterations for more complex loops.
93	For example, consider this loop from gdb
94	#define SWAP_TARGET_AND_HOST(buffer,len)
95	{
96	char tmp;
97	char p = (char ) buffer;
98	char q = ((char ) buffer) + len - 1;
99	int iterations = (len + 1) >> 1;
100	int i;
101	for (p; p < q; p++, q--;)
102	{
103	tmp = *q;
104	q = p;
105	*p = tmp;
106	}
107	}
108	Note that:
109	start value = p = &buffer + current_iteration
110	end value = q = &buffer + len - 1 - current_iteration
111	Given the loop exit test of "p < q", then there must be "q - p" iterations,
112	set equal to zero and solve for number of iterations:
113	q - p = len - 1 - 2*current_iteration = 0
114	current_iteration = (len - 1) / 2
115	Hence, there are (len - 1) / 2 (rounded up to the nearest integer)
116	iterations of this loop. */
117
118	/* ??? Currently, no labels are marked as loop invariant when doing loop
119	unrolling. This is because an insn inside the loop, that loads the address
120	of a label inside the loop into a register, could be moved outside the loop
121	by the invariant code motion pass if labels were invariant. If the loop
122	is subsequently unrolled, the code will be wrong because each unrolled
123	body of the loop will use the same address, whereas each actually needs a
124	different address. A case where this happens is when a loop containing
125	a switch statement is unrolled.
126
127	It would be better to let labels be considered invariant. When we
128	unroll loops here, check to see if any insns using a label local to the
129	loop were moved before the loop. If so, then correct the problem, by
130	moving the insn back into the loop, or perhaps replicate the insn before
131	the loop, one copy for each time the loop is unrolled. */
132
133	/* The prime factors looked for when trying to unroll a loop by some
134	number which is modulo the total number of iterations. Just checking
135	for these 4 prime factors will find at least one factor for 75% of
136	all numbers theoretically. Practically speaking, this will succeed
137	almost all of the time since loops are generally a multiple of 2
138	and/or 5. */
139
140	#define NUM_FACTORS 4
141
142	struct _factor { int factor, count; } factors[NUM_FACTORS]
143	= { {2, 0}, {3, 0}, {5, 0}, {7, 0}};
144
145	/* Describes the different types of loop unrolling performed. */
146
147	enum unroll_types { UNROLL_COMPLETELY, UNROLL_MODULO, UNROLL_NAIVE };
148
149	#include "config.h"
150	#include <stdio.h>
151	#include "rtl.h"
152	#include "insn-config.h"
153	#include "integrate.h"
154	#include "regs.h"
155	#include "recog.h"
156	#include "flags.h"
157	#include "expr.h"
158	#include "loop.h"
159
160	/* This controls which loops are unrolled, and by how much we unroll
161	them. */
162
163	#ifndef MAX_UNROLLED_INSNS
164	#define MAX_UNROLLED_INSNS 100
165	#endif
166
167	/* Indexed by register number, if non-zero, then it contains a pointer
168	to a struct induction for a DEST_REG giv which has been combined with
169	one of more address givs. This is needed because whenever such a DEST_REG
170	giv is modified, we must modify the value of all split address givs
171	that were combined with this DEST_REG giv. */
172
173	static struct induction **addr_combined_regs;
174
175	/* Indexed by register number, if this is a splittable induction variable,
176	then this will hold the current value of the register, which depends on the
177	iteration number. */
178
179	static rtx *splittable_regs;
180
181	/* Indexed by register number, if this is a splittable induction variable,
182	then this will hold the number of instructions in the loop that modify
183	the induction variable. Used to ensure that only the last insn modifying
184	a split iv will update the original iv of the dest. */
185
186	static int *splittable_regs_updates;
187
188	/* Values describing the current loop's iteration variable. These are set up
189	by loop_iterations, and used by precondition_loop_p. */
190
191	static rtx loop_iteration_var;
192	static rtx loop_initial_value;
193	static rtx loop_increment;
194	static rtx loop_final_value;
195	static enum rtx_code loop_comparison_code;
196
197	/* Forward declarations. */
198
199	static void init_reg_map PROTO((struct inline_remap *, int));
200	static int precondition_loop_p PROTO((rtx , rtx , rtx *, rtx, rtx));
201	static rtx calculate_giv_inc PROTO((rtx, rtx, int));
202	static rtx initial_reg_note_copy PROTO((rtx, struct inline_remap *));
203	static void final_reg_note_copy PROTO((rtx, struct inline_remap *));
204	static void copy_loop_body PROTO((rtx, rtx, struct inline_remap *, rtx, int,
205	enum unroll_types, rtx, rtx, rtx, rtx));
206	static void iteration_info PROTO((rtx, rtx , rtx , rtx, rtx));
207	static rtx approx_final_value PROTO((enum rtx_code, rtx, int , int ));
208	static int find_splittable_regs PROTO((enum unroll_types, rtx, rtx, rtx, int));
209	static int find_splittable_givs PROTO((struct iv_class *,enum unroll_types,
210	rtx, rtx, rtx, int));
211	static int reg_dead_after_loop PROTO((rtx, rtx, rtx));
212	static rtx fold_rtx_mult_add PROTO((rtx, rtx, rtx, enum machine_mode));
213	static rtx remap_split_bivs PROTO((rtx));
214
215	/* Try to unroll one loop and split induction variables in the loop.
216
217	The loop is described by the arguments LOOP_END, INSN_COUNT, and
218	LOOP_START. END_INSERT_BEFORE indicates where insns should be added
219	which need to be executed when the loop falls through. STRENGTH_REDUCTION_P
220	indicates whether information generated in the strength reduction pass
221	is available.
222
223	This function is intended to be called from within `strength_reduce'
224	in loop.c. */
225
226	void
227	unroll_loop (loop_end, insn_count, loop_start, end_insert_before,
228	strength_reduce_p)
229	rtx loop_end;
230	int insn_count;
231	rtx loop_start;
232	rtx end_insert_before;
233	int strength_reduce_p;
234	{
235	int i, j, temp;
236	int unroll_number = 1;
237	rtx copy_start, copy_end;
238	rtx insn, copy, sequence, pattern, tem;
239	int max_labelno, max_insnno;
240	rtx insert_before;
241	struct inline_remap *map;
242	char *local_label;
243	char *local_regno;
244	int maxregnum;
245	int new_maxregnum;
246	rtx exit_label = 0;
247	rtx start_label;
248	struct iv_class *bl;
249	int splitting_not_safe = 0;
250	enum unroll_types unroll_type;
251	int loop_preconditioned = 0;
252	rtx safety_label;
253	/* This points to the last real insn in the loop, which should be either
254	a JUMP_INSN (for conditional jumps) or a BARRIER (for unconditional
255	jumps). */
256	rtx last_loop_insn;
257
258	/* Don't bother unrolling huge loops. Since the minimum factor is
259	two, loops greater than one half of MAX_UNROLLED_INSNS will never
260	be unrolled. */
261	if (insn_count > MAX_UNROLLED_INSNS / 2)
262	{
263	if (loop_dump_stream)
264	fprintf (loop_dump_stream, "Unrolling failure: Loop too big.\n");
265	return;
266	}
267
268	/* When emitting debugger info, we can't unroll loops with unequal numbers
269	of block_beg and block_end notes, because that would unbalance the block
270	structure of the function. This can happen as a result of the
271	"if (foo) bar; else break;" optimization in jump.c. */
272	/* ??? Gcc has a general policy that -g is never supposed to change the code
273	that the compiler emits, so we must disable this optimization always,
274	even if debug info is not being output. This is rare, so this should
275	not be a significant performance problem. */
276
277	if (1 /* write_symbols != NO_DEBUG */)
278	{
279	int block_begins = 0;
280	int block_ends = 0;
281
282	for (insn = loop_start; insn != loop_end; insn = NEXT_INSN (insn))
283	{
284	if (GET_CODE (insn) == NOTE)
285	{
286	if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_BLOCK_BEG)
287	block_begins++;
288	else if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_BLOCK_END)
289	block_ends++;
290	}
291	}
292
293	if (block_begins != block_ends)
294	{
295	if (loop_dump_stream)
296	fprintf (loop_dump_stream,
297	"Unrolling failure: Unbalanced block notes.\n");
298	return;
299	}
300	}
301
302	/* Determine type of unroll to perform. Depends on the number of iterations
303	and the size of the loop. */
304
305	/* If there is no strength reduce info, then set loop_n_iterations to zero.
306	This can happen if strength_reduce can't find any bivs in the loop.
307	A value of zero indicates that the number of iterations could not be
308	calculated. */
309
310	if (! strength_reduce_p)
311	loop_n_iterations = 0;
312
313	if (loop_dump_stream && loop_n_iterations > 0)
314	fprintf (loop_dump_stream,
315	"Loop unrolling: %d iterations.\n", loop_n_iterations);
316
317	/* Find and save a pointer to the last nonnote insn in the loop. */
318
319	last_loop_insn = prev_nonnote_insn (loop_end);
320
321	/* Calculate how many times to unroll the loop. Indicate whether or
322	not the loop is being completely unrolled. */
323
324	if (loop_n_iterations == 1)
325	{
326	/* If number of iterations is exactly 1, then eliminate the compare and
327	branch at the end of the loop since they will never be taken.
328	Then return, since no other action is needed here. */
329
330	/* If the last instruction is not a BARRIER or a JUMP_INSN, then
331	don't do anything. */
332
333	if (GET_CODE (last_loop_insn) == BARRIER)
334	{
335	/* Delete the jump insn. This will delete the barrier also. */
336	delete_insn (PREV_INSN (last_loop_insn));
337	}
338	else if (GET_CODE (last_loop_insn) == JUMP_INSN)
339	{
340	#ifdef HAVE_cc0
341	/* The immediately preceding insn is a compare which must be
342	deleted. */
343	delete_insn (last_loop_insn);
344	delete_insn (PREV_INSN (last_loop_insn));
345	#else
346	/* The immediately preceding insn may not be the compare, so don't
347	delete it. */
348	delete_insn (last_loop_insn);
349	#endif
350	}
351	return;
352	}
353	else if (loop_n_iterations > 0
354	&& loop_n_iterations * insn_count < MAX_UNROLLED_INSNS)
355	{
356	unroll_number = loop_n_iterations;
357	unroll_type = UNROLL_COMPLETELY;
358	}
359	else if (loop_n_iterations > 0)
360	{
361	/* Try to factor the number of iterations. Don't bother with the
362	general case, only using 2, 3, 5, and 7 will get 75% of all
363	numbers theoretically, and almost all in practice. */
364
365	for (i = 0; i < NUM_FACTORS; i++)
366	factors[i].count = 0;
367
368	temp = loop_n_iterations;
369	for (i = NUM_FACTORS - 1; i >= 0; i--)
370	while (temp % factors[i].factor == 0)
371	{
372	factors[i].count++;
373	temp = temp / factors[i].factor;
374	}
375
376	/* Start with the larger factors first so that we generally
377	get lots of unrolling. */
378
379	unroll_number = 1;
380	temp = insn_count;
381	for (i = 3; i >= 0; i--)
382	while (factors[i].count--)
383	{
384	if (temp * factors[i].factor < MAX_UNROLLED_INSNS)
385	{
386	unroll_number *= factors[i].factor;
387	temp *= factors[i].factor;
388	}
389	else
390	break;
391	}
392
393	/* If we couldn't find any factors, then unroll as in the normal
394	case. */
395	if (unroll_number == 1)
396	{
397	if (loop_dump_stream)
398	fprintf (loop_dump_stream,
399	"Loop unrolling: No factors found.\n");
400	}
401	else
402	unroll_type = UNROLL_MODULO;
403	}
404
405
406	/* Default case, calculate number of times to unroll loop based on its
407	size. */
408	if (unroll_number == 1)
409	{
410	if (8 * insn_count < MAX_UNROLLED_INSNS)
411	unroll_number = 8;
412	else if (4 * insn_count < MAX_UNROLLED_INSNS)
413	unroll_number = 4;
414	else
415	unroll_number = 2;
416
417	unroll_type = UNROLL_NAIVE;
418	}
419
420	/* Now we know how many times to unroll the loop. */
421
422	if (loop_dump_stream)
423	fprintf (loop_dump_stream,
424	"Unrolling loop %d times.\n", unroll_number);
425
426
427	if (unroll_type == UNROLL_COMPLETELY \|\| unroll_type == UNROLL_MODULO)
428	{
429	/* Loops of these types should never start with a jump down to
430	the exit condition test. For now, check for this case just to
431	be sure. UNROLL_NAIVE loops can be of this form, this case is
432	handled below. */
433	insn = loop_start;
434	while (GET_CODE (insn) != CODE_LABEL && GET_CODE (insn) != JUMP_INSN)
435	insn = NEXT_INSN (insn);
436	if (GET_CODE (insn) == JUMP_INSN)
437	abort ();
438	}
439
440	if (unroll_type == UNROLL_COMPLETELY)
441	{
442	/* Completely unrolling the loop: Delete the compare and branch at
443	the end (the last two instructions). This delete must done at the
444	very end of loop unrolling, to avoid problems with calls to
445	back_branch_in_range_p, which is called by find_splittable_regs.
446	All increments of splittable bivs/givs are changed to load constant
447	instructions. */
448
449	copy_start = loop_start;
450
451	/* Set insert_before to the instruction immediately after the JUMP_INSN
452	(or BARRIER), so that any NOTEs between the JUMP_INSN and the end of
453	the loop will be correctly handled by copy_loop_body. */
454	insert_before = NEXT_INSN (last_loop_insn);
455
456	/* Set copy_end to the insn before the jump at the end of the loop. */
457	if (GET_CODE (last_loop_insn) == BARRIER)
458	copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
459	else if (GET_CODE (last_loop_insn) == JUMP_INSN)
460	{
461	#ifdef HAVE_cc0
462	/* The instruction immediately before the JUMP_INSN is a compare
463	instruction which we do not want to copy. */
464	copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
465	#else
466	/* The instruction immediately before the JUMP_INSN may not be the
467	compare, so we must copy it. */
468	copy_end = PREV_INSN (last_loop_insn);
469	#endif
470	}
471	else
472	{
473	/* We currently can't unroll a loop if it doesn't end with a
474	JUMP_INSN. There would need to be a mechanism that recognizes
475	this case, and then inserts a jump after each loop body, which
476	jumps to after the last loop body. */
477	if (loop_dump_stream)
478	fprintf (loop_dump_stream,
479	"Unrolling failure: loop does not end with a JUMP_INSN.\n");
480	return;
481	}
482	}
483	else if (unroll_type == UNROLL_MODULO)
484	{
485	/* Partially unrolling the loop: The compare and branch at the end
486	(the last two instructions) must remain. Don't copy the compare
487	and branch instructions at the end of the loop. Insert the unrolled
488	code immediately before the compare/branch at the end so that the
489	code will fall through to them as before. */
490
491	copy_start = loop_start;
492
493	/* Set insert_before to the jump insn at the end of the loop.
494	Set copy_end to before the jump insn at the end of the loop. */
495	if (GET_CODE (last_loop_insn) == BARRIER)
496	{
497	insert_before = PREV_INSN (last_loop_insn);
498	copy_end = PREV_INSN (insert_before);
499	}
500	else if (GET_CODE (last_loop_insn) == JUMP_INSN)
501	{
502	#ifdef HAVE_cc0
503	/* The instruction immediately before the JUMP_INSN is a compare
504	instruction which we do not want to copy or delete. */
505	insert_before = PREV_INSN (last_loop_insn);
506	copy_end = PREV_INSN (insert_before);
507	#else
508	/* The instruction immediately before the JUMP_INSN may not be the
509	compare, so we must copy it. */
510	insert_before = last_loop_insn;
511	copy_end = PREV_INSN (last_loop_insn);
512	#endif
513	}
514	else
515	{
516	/* We currently can't unroll a loop if it doesn't end with a
517	JUMP_INSN. There would need to be a mechanism that recognizes
518	this case, and then inserts a jump after each loop body, which
519	jumps to after the last loop body. */
520	if (loop_dump_stream)
521	fprintf (loop_dump_stream,
522	"Unrolling failure: loop does not end with a JUMP_INSN.\n");
523	return;
524	}
525	}
526	else
527	{
528	/* Normal case: Must copy the compare and branch instructions at the
529	end of the loop. */
530
531	if (GET_CODE (last_loop_insn) == BARRIER)
532	{
533	/* Loop ends with an unconditional jump and a barrier.
534	Handle this like above, don't copy jump and barrier.
535	This is not strictly necessary, but doing so prevents generating
536	unconditional jumps to an immediately following label.
537
538	This will be corrected below if the target of this jump is
539	not the start_label. */
540
541	insert_before = PREV_INSN (last_loop_insn);
542	copy_end = PREV_INSN (insert_before);
543	}
544	else if (GET_CODE (last_loop_insn) == JUMP_INSN)
545	{
546	/* Set insert_before to immediately after the JUMP_INSN, so that
547	NOTEs at the end of the loop will be correctly handled by
548	copy_loop_body. */
549	insert_before = NEXT_INSN (last_loop_insn);
550	copy_end = last_loop_insn;
551	}
552	else
553	{
554	/* We currently can't unroll a loop if it doesn't end with a
555	JUMP_INSN. There would need to be a mechanism that recognizes
556	this case, and then inserts a jump after each loop body, which
557	jumps to after the last loop body. */
558	if (loop_dump_stream)
559	fprintf (loop_dump_stream,
560	"Unrolling failure: loop does not end with a JUMP_INSN.\n");
561	return;
562	}
563
564	/* If copying exit test branches because they can not be eliminated,
565	then must convert the fall through case of the branch to a jump past
566	the end of the loop. Create a label to emit after the loop and save
567	it for later use. Do not use the label after the loop, if any, since
568	it might be used by insns outside the loop, or there might be insns
569	added before it later by final_[bg]iv_value which must be after
570	the real exit label. */
571	exit_label = gen_label_rtx ();
572
573	insn = loop_start;
574	while (GET_CODE (insn) != CODE_LABEL && GET_CODE (insn) != JUMP_INSN)
575	insn = NEXT_INSN (insn);
576
577	if (GET_CODE (insn) == JUMP_INSN)
578	{
579	/* The loop starts with a jump down to the exit condition test.
580	Start copying the loop after the barrier following this
581	jump insn. */
582	copy_start = NEXT_INSN (insn);
583
584	/* Splitting induction variables doesn't work when the loop is
585	entered via a jump to the bottom, because then we end up doing
586	a comparison against a new register for a split variable, but
587	we did not execute the set insn for the new register because
588	it was skipped over. */
589	splitting_not_safe = 1;
590	if (loop_dump_stream)
591	fprintf (loop_dump_stream,
592	"Splitting not safe, because loop not entered at top.\n");
593	}
594	else
595	copy_start = loop_start;
596	}
597
598	/* This should always be the first label in the loop. */
599	start_label = NEXT_INSN (copy_start);
600	/* There may be a line number note and/or a loop continue note here. */
601	while (GET_CODE (start_label) == NOTE)
602	start_label = NEXT_INSN (start_label);
603	if (GET_CODE (start_label) != CODE_LABEL)
604	{
605	/* This can happen as a result of jump threading. If the first insns in
606	the loop test the same condition as the loop's backward jump, or the
607	opposite condition, then the backward jump will be modified to point
608	to elsewhere, and the loop's start label is deleted.
609
610	This case currently can not be handled by the loop unrolling code. */
611
612	if (loop_dump_stream)
613	fprintf (loop_dump_stream,
614	"Unrolling failure: unknown insns between BEG note and loop label.\n");
615	return;
616	}
617	if (LABEL_NAME (start_label))
618	{
619	/* The jump optimization pass must have combined the original start label
620	with a named label for a goto. We can't unroll this case because
621	jumps which go to the named label must be handled differently than
622	jumps to the loop start, and it is impossible to differentiate them
623	in this case. */
624	if (loop_dump_stream)
625	fprintf (loop_dump_stream,
626	"Unrolling failure: loop start label is gone\n");
627	return;
628	}
629
630	if (unroll_type == UNROLL_NAIVE
631	&& GET_CODE (last_loop_insn) == BARRIER
632	&& start_label != JUMP_LABEL (PREV_INSN (last_loop_insn)))
633	{
634	/* In this case, we must copy the jump and barrier, because they will
635	not be converted to jumps to an immediately following label. */
636
637	insert_before = NEXT_INSN (last_loop_insn);
638	copy_end = last_loop_insn;
639	}
640
641	if (unroll_type == UNROLL_NAIVE
642	&& GET_CODE (last_loop_insn) == JUMP_INSN
643	&& start_label != JUMP_LABEL (last_loop_insn))
644	{
645	/* ??? The loop ends with a conditional branch that does not branch back
646	to the loop start label. In this case, we must emit an unconditional
647	branch to the loop exit after emitting the final branch.
648	copy_loop_body does not have support for this currently, so we
649	give up. It doesn't seem worthwhile to unroll anyways since
650	unrolling would increase the number of branch instructions
651	executed. */
652	if (loop_dump_stream)
653	fprintf (loop_dump_stream,
654	"Unrolling failure: final conditional branch not to loop start\n");
655	return;
656	}
657
658	/* Allocate a translation table for the labels and insn numbers.
659	They will be filled in as we copy the insns in the loop. */
660
661	max_labelno = max_label_num ();
662	max_insnno = get_max_uid ();
663
664	map = (struct inline_remap *) alloca (sizeof (struct inline_remap));
665
666	map->integrating = 0;
667
668	/* Allocate the label map. */
669
670	if (max_labelno > 0)
671	{
672	map->label_map = (rtx ) alloca (max_labelno sizeof (rtx));
673
674	local_label = (char *) alloca (max_labelno);
675	bzero (local_label, max_labelno);
676	}
677	else
678	map->label_map = 0;
679
680	/* Search the loop and mark all local labels, i.e. the ones which have to
681	be distinct labels when copied. For all labels which might be
682	non-local, set their label_map entries to point to themselves.
683	If they happen to be local their label_map entries will be overwritten
684	before the loop body is copied. The label_map entries for local labels
685	will be set to a different value each time the loop body is copied. */
686
687	for (insn = copy_start; insn != loop_end; insn = NEXT_INSN (insn))
688	{
689	if (GET_CODE (insn) == CODE_LABEL)
690	local_label[CODE_LABEL_NUMBER (insn)] = 1;
691	else if (GET_CODE (insn) == JUMP_INSN)
692	{
693	if (JUMP_LABEL (insn))
694	set_label_in_map (map,
695	CODE_LABEL_NUMBER (JUMP_LABEL (insn)),
696	JUMP_LABEL (insn));
697	else if (GET_CODE (PATTERN (insn)) == ADDR_VEC
698	\|\| GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
699	{
700	rtx pat = PATTERN (insn);
701	int diff_vec_p = GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC;
702	int len = XVECLEN (pat, diff_vec_p);
703	rtx label;
704
705	for (i = 0; i < len; i++)
706	{
707	label = XEXP (XVECEXP (pat, diff_vec_p, i), 0);
708	set_label_in_map (map,
709	CODE_LABEL_NUMBER (label),
710	label);
711	}
712	}
713	}
714	}
715
716	/* Allocate space for the insn map. */
717
718	map->insn_map = (rtx ) alloca (max_insnno sizeof (rtx));
719
720	/* Set this to zero, to indicate that we are doing loop unrolling,
721	not function inlining. */
722	map->inline_target = 0;
723
724	/* The register and constant maps depend on the number of registers
725	present, so the final maps can't be created until after
726	find_splittable_regs is called. However, they are needed for
727	preconditioning, so we create temporary maps when preconditioning
728	is performed. */
729
730	/* The preconditioning code may allocate two new pseudo registers. */
731	maxregnum = max_reg_num ();
732
733	/* Allocate and zero out the splittable_regs and addr_combined_regs
734	arrays. These must be zeroed here because they will be used if
735	loop preconditioning is performed, and must be zero for that case.
736
737	It is safe to do this here, since the extra registers created by the
738	preconditioning code and find_splittable_regs will never be used
739	to access the splittable_regs[] and addr_combined_regs[] arrays. */
740
741	splittable_regs = (rtx ) alloca (maxregnum sizeof (rtx));
742	bzero ((char ) splittable_regs, maxregnum sizeof (rtx));
743	splittable_regs_updates = (int ) alloca (maxregnum sizeof (int));
744	bzero ((char ) splittable_regs_updates, maxregnum sizeof (int));
745	addr_combined_regs
746	= (struct induction *) alloca (maxregnum sizeof (struct induction *));
747	bzero ((char ) addr_combined_regs, maxregnum sizeof (struct induction *));
748	/* We must limit it to max_reg_before_loop, because only these pseudo
749	registers have valid regno_first_uid info. Any register created after
750	that is unlikely to be local to the loop anyways. */
751	local_regno = (char *) alloca (max_reg_before_loop);
752	bzero (local_regno, max_reg_before_loop);
753
754	/* Mark all local registers, i.e. the ones which are referenced only
755	inside the loop. */
756	if (INSN_UID (copy_end) < max_uid_for_loop)
757	{
758	int copy_start_luid = INSN_LUID (copy_start);
759	int copy_end_luid = INSN_LUID (copy_end);
760
761	/* If a register is used in the jump insn, we must not duplicate it
762	since it will also be used outside the loop. */
763	if (GET_CODE (copy_end) == JUMP_INSN)
764	copy_end_luid--;
765	/* If copy_start points to the NOTE that starts the loop, then we must
766	use the next luid, because invariant pseudo-regs moved out of the loop
767	have their lifetimes modified to start here, but they are not safe
768	to duplicate. */
769	if (copy_start == loop_start)
770	copy_start_luid++;
771
772	/* If a pseudo's lifetime is entirely contained within this loop, then we
773	can use a different pseudo in each unrolled copy of the loop. This
774	results in better code. */
775	for (j = FIRST_PSEUDO_REGISTER; j < max_reg_before_loop; ++j)
776	if (REGNO_FIRST_UID (j) > 0 && REGNO_FIRST_UID (j) <= max_uid_for_loop
777	&& uid_luid[REGNO_FIRST_UID (j)] >= copy_start_luid
778	&& REGNO_LAST_UID (j) > 0 && REGNO_LAST_UID (j) <= max_uid_for_loop
779	&& uid_luid[REGNO_LAST_UID (j)] <= copy_end_luid)
780	{
781	/* However, we must also check for loop-carried dependencies.
782	If the value the pseudo has at the end of iteration X is
783	used by iteration X+1, then we can not use a different pseudo
784	for each unrolled copy of the loop. */
785	/* A pseudo is safe if regno_first_uid is a set, and this
786	set dominates all instructions from regno_first_uid to
787	regno_last_uid. */
788	/* ??? This check is simplistic. We would get better code if
789	this check was more sophisticated. */
790	if (set_dominates_use (j, REGNO_FIRST_UID (j), REGNO_LAST_UID (j),
791	copy_start, copy_end))
792	local_regno[j] = 1;
793
794	if (loop_dump_stream)
795	{
796	if (local_regno[j])
797	fprintf (loop_dump_stream, "Marked reg %d as local\n", j);
798	else
799	fprintf (loop_dump_stream, "Did not mark reg %d as local\n",
800	j);
801	}
802	}
803	}
804
805	/* If this loop requires exit tests when unrolled, check to see if we
806	can precondition the loop so as to make the exit tests unnecessary.
807	Just like variable splitting, this is not safe if the loop is entered
808	via a jump to the bottom. Also, can not do this if no strength
809	reduce info, because precondition_loop_p uses this info. */
810
811	/* Must copy the loop body for preconditioning before the following
812	find_splittable_regs call since that will emit insns which need to
813	be after the preconditioned loop copies, but immediately before the
814	unrolled loop copies. */
815
816	/* Also, it is not safe to split induction variables for the preconditioned
817	copies of the loop body. If we split induction variables, then the code
818	assumes that each induction variable can be represented as a function
819	of its initial value and the loop iteration number. This is not true
820	in this case, because the last preconditioned copy of the loop body
821	could be any iteration from the first up to the `unroll_number-1'th,
822	depending on the initial value of the iteration variable. Therefore
823	we can not split induction variables here, because we can not calculate
824	their value. Hence, this code must occur before find_splittable_regs
825	is called. */
826
827	if (unroll_type == UNROLL_NAIVE && ! splitting_not_safe && strength_reduce_p)
828	{
829	rtx initial_value, final_value, increment;
830
831	if (precondition_loop_p (&initial_value, &final_value, &increment,
832	loop_start, loop_end))
833	{
834	register rtx diff, temp;
835	enum machine_mode mode;
836	rtx *labels;
837	int abs_inc, neg_inc;
838
839	map->reg_map = (rtx ) alloca (maxregnum sizeof (rtx));
840
841	map->const_equiv_map = (rtx ) alloca (maxregnum sizeof (rtx));
842	map->const_age_map = (unsigned *) alloca (maxregnum
843	* sizeof (unsigned));
844	map->const_equiv_map_size = maxregnum;
845	global_const_equiv_map = map->const_equiv_map;
846	global_const_equiv_map_size = maxregnum;
847
848	init_reg_map (map, maxregnum);
849
850	/* Limit loop unrolling to 4, since this will make 7 copies of
851	the loop body. */
852	if (unroll_number > 4)
853	unroll_number = 4;
854
855	/* Save the absolute value of the increment, and also whether or
856	not it is negative. */
857	neg_inc = 0;
858	abs_inc = INTVAL (increment);
859	if (abs_inc < 0)
860	{
861	abs_inc = - abs_inc;
862	neg_inc = 1;
863	}
864
865	start_sequence ();
866
867	/* Decide what mode to do these calculations in. Choose the larger
868	of final_value's mode and initial_value's mode, or a full-word if
869	both are constants. */
870	mode = GET_MODE (final_value);
871	if (mode == VOIDmode)
872	{
873	mode = GET_MODE (initial_value);
874	if (mode == VOIDmode)
875	mode = word_mode;
876	}
877	else if (mode != GET_MODE (initial_value)
878	&& (GET_MODE_SIZE (mode)
879	< GET_MODE_SIZE (GET_MODE (initial_value))))
880	mode = GET_MODE (initial_value);
881
882	/* Calculate the difference between the final and initial values.
883	Final value may be a (plus (reg x) (const_int 1)) rtx.
884	Let the following cse pass simplify this if initial value is
885	a constant.
886
887	We must copy the final and initial values here to avoid
888	improperly shared rtl. */
889
890	diff = expand_binop (mode, sub_optab, copy_rtx (final_value),
891	copy_rtx (initial_value), NULL_RTX, 0,
892	OPTAB_LIB_WIDEN);
893
894	/* Now calculate (diff % (unroll * abs (increment))) by using an
895	and instruction. */
896	diff = expand_binop (GET_MODE (diff), and_optab, diff,
897	GEN_INT (unroll_number * abs_inc - 1),
898	NULL_RTX, 0, OPTAB_LIB_WIDEN);
899
900	/* Now emit a sequence of branches to jump to the proper precond
901	loop entry point. */
902
903	labels = (rtx ) alloca (sizeof (rtx) unroll_number);
904	for (i = 0; i < unroll_number; i++)
905	labels[i] = gen_label_rtx ();
906
907	/* Check for the case where the initial value is greater than or
908	equal to the final value. In that case, we want to execute
909	exactly one loop iteration. The code below will fail for this
910	case. This check does not apply if the loop has a NE
911	comparison at the end. */
912
913	if (loop_comparison_code != NE)
914	{
915	emit_cmp_insn (initial_value, final_value, neg_inc ? LE : GE,
916	NULL_RTX, mode, 0, 0);
917	if (neg_inc)
918	emit_jump_insn (gen_ble (labels[1]));
919	else
920	emit_jump_insn (gen_bge (labels[1]));
921	JUMP_LABEL (get_last_insn ()) = labels[1];
922	LABEL_NUSES (labels[1])++;
923	}
924
925	/* Assuming the unroll_number is 4, and the increment is 2, then
926	for a negative increment: for a positive increment:
927	diff = 0,1 precond 0 diff = 0,7 precond 0
928	diff = 2,3 precond 3 diff = 1,2 precond 1
929	diff = 4,5 precond 2 diff = 3,4 precond 2
930	diff = 6,7 precond 1 diff = 5,6 precond 3 */
931
932	/* We only need to emit (unroll_number - 1) branches here, the
933	last case just falls through to the following code. */
934
935	/* ??? This would give better code if we emitted a tree of branches
936	instead of the current linear list of branches. */
937
938	for (i = 0; i < unroll_number - 1; i++)
939	{
940	int cmp_const;
941	enum rtx_code cmp_code;
942
943	/* For negative increments, must invert the constant compared
944	against, except when comparing against zero. */
945	if (i == 0)
946	{
947	cmp_const = 0;
948	cmp_code = EQ;
949	}
950	else if (neg_inc)
951	{
952	cmp_const = unroll_number - i;
953	cmp_code = GE;
954	}
955	else
956	{
957	cmp_const = i;
958	cmp_code = LE;
959	}
960
961	emit_cmp_insn (diff, GEN_INT (abs_inc * cmp_const),
962	cmp_code, NULL_RTX, mode, 0, 0);
963
964	if (i == 0)
965	emit_jump_insn (gen_beq (labels[i]));
966	else if (neg_inc)
967	emit_jump_insn (gen_bge (labels[i]));
968	else
969	emit_jump_insn (gen_ble (labels[i]));
970	JUMP_LABEL (get_last_insn ()) = labels[i];
971	LABEL_NUSES (labels[i])++;
972	}
973
974	/* If the increment is greater than one, then we need another branch,
975	to handle other cases equivalent to 0. */
976
977	/* ??? This should be merged into the code above somehow to help
978	simplify the code here, and reduce the number of branches emitted.
979	For the negative increment case, the branch here could easily
980	be merged with the `0' case branch above. For the positive
981	increment case, it is not clear how this can be simplified. */
982
983	if (abs_inc != 1)
984	{
985	int cmp_const;
986	enum rtx_code cmp_code;
987
988	if (neg_inc)
989	{
990	cmp_const = abs_inc - 1;
991	cmp_code = LE;
992	}
993	else
994	{
995	cmp_const = abs_inc * (unroll_number - 1) + 1;
996	cmp_code = GE;
997	}
998
999	emit_cmp_insn (diff, GEN_INT (cmp_const), cmp_code, NULL_RTX,
1000	mode, 0, 0);
1001
1002	if (neg_inc)
1003	emit_jump_insn (gen_ble (labels[0]));
1004	else
1005	emit_jump_insn (gen_bge (labels[0]));
1006	JUMP_LABEL (get_last_insn ()) = labels[0];
1007	LABEL_NUSES (labels[0])++;
1008	}
1009
1010	sequence = gen_sequence ();
1011	end_sequence ();
1012	emit_insn_before (sequence, loop_start);
1013
1014	/* Only the last copy of the loop body here needs the exit
1015	test, so set copy_end to exclude the compare/branch here,
1016	and then reset it inside the loop when get to the last
1017	copy. */
1018
1019	if (GET_CODE (last_loop_insn) == BARRIER)
1020	copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
1021	else if (GET_CODE (last_loop_insn) == JUMP_INSN)
1022	{
1023	#ifdef HAVE_cc0
1024	/* The immediately preceding insn is a compare which we do not
1025	want to copy. */
1026	copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
1027	#else
1028	/* The immediately preceding insn may not be a compare, so we
1029	must copy it. */
1030	copy_end = PREV_INSN (last_loop_insn);
1031	#endif
1032	}
1033	else
1034	abort ();
1035
1036	for (i = 1; i < unroll_number; i++)
1037	{
1038	emit_label_after (labels[unroll_number - i],
1039	PREV_INSN (loop_start));
1040
1041	bzero ((char ) map->insn_map, max_insnno sizeof (rtx));
1042	bzero ((char ) map->const_equiv_map, maxregnum sizeof (rtx));
1043	bzero ((char *) map->const_age_map,
1044	maxregnum * sizeof (unsigned));
1045	map->const_age = 0;
1046
1047	for (j = 0; j < max_labelno; j++)
1048	if (local_label[j])
1049	set_label_in_map (map, j, gen_label_rtx ());
1050
1051	for (j = FIRST_PSEUDO_REGISTER; j < max_reg_before_loop; j++)
1052	if (local_regno[j])
1053	map->reg_map[j] = gen_reg_rtx (GET_MODE (regno_reg_rtx[j]));
1054
1055	/* The last copy needs the compare/branch insns at the end,
1056	so reset copy_end here if the loop ends with a conditional
1057	branch. */
1058
1059	if (i == unroll_number - 1)
1060	{
1061	if (GET_CODE (last_loop_insn) == BARRIER)
1062	copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
1063	else
1064	copy_end = last_loop_insn;
1065	}
1066
1067	/* None of the copies are the `last_iteration', so just
1068	pass zero for that parameter. */
1069	copy_loop_body (copy_start, copy_end, map, exit_label, 0,
1070	unroll_type, start_label, loop_end,
1071	loop_start, copy_end);
1072	}
1073	emit_label_after (labels[0], PREV_INSN (loop_start));
1074
1075	if (GET_CODE (last_loop_insn) == BARRIER)
1076	{
1077	insert_before = PREV_INSN (last_loop_insn);
1078	copy_end = PREV_INSN (insert_before);
1079	}
1080	else
1081	{
1082	#ifdef HAVE_cc0
1083	/* The immediately preceding insn is a compare which we do not
1084	want to copy. */
1085	insert_before = PREV_INSN (last_loop_insn);
1086	copy_end = PREV_INSN (insert_before);
1087	#else
1088	/* The immediately preceding insn may not be a compare, so we
1089	must copy it. */
1090	insert_before = last_loop_insn;
1091	copy_end = PREV_INSN (last_loop_insn);
1092	#endif
1093	}
1094
1095	/* Set unroll type to MODULO now. */
1096	unroll_type = UNROLL_MODULO;
1097	loop_preconditioned = 1;
1098	}
1099	}
1100
1101	/* If reach here, and the loop type is UNROLL_NAIVE, then don't unroll
1102	the loop unless all loops are being unrolled. */
1103	if (unroll_type == UNROLL_NAIVE && ! flag_unroll_all_loops)
1104	{
1105	if (loop_dump_stream)
1106	fprintf (loop_dump_stream, "Unrolling failure: Naive unrolling not being done.\n");
1107	return;
1108	}
1109
1110	/* At this point, we are guaranteed to unroll the loop. */
1111
1112	/* For each biv and giv, determine whether it can be safely split into
1113	a different variable for each unrolled copy of the loop body.
1114	We precalculate and save this info here, since computing it is
1115	expensive.
1116
1117	Do this before deleting any instructions from the loop, so that
1118	back_branch_in_range_p will work correctly. */
1119
1120	if (splitting_not_safe)
1121	temp = 0;
1122	else
1123	temp = find_splittable_regs (unroll_type, loop_start, loop_end,
1124	end_insert_before, unroll_number);
1125
1126	/* find_splittable_regs may have created some new registers, so must
1127	reallocate the reg_map with the new larger size, and must realloc
1128	the constant maps also. */
1129
1130	maxregnum = max_reg_num ();
1131	map->reg_map = (rtx ) alloca (maxregnum sizeof (rtx));
1132
1133	init_reg_map (map, maxregnum);
1134
1135	/* Space is needed in some of the map for new registers, so new_maxregnum
1136	is an (over)estimate of how many registers will exist at the end. */
1137	new_maxregnum = maxregnum + (temp * unroll_number * 2);
1138
1139	/* Must realloc space for the constant maps, because the number of registers
1140	may have changed. */
1141
1142	map->const_equiv_map = (rtx ) alloca (new_maxregnum sizeof (rtx));
1143	map->const_age_map = (unsigned ) alloca (new_maxregnum sizeof (unsigned));
1144
1145	map->const_equiv_map_size = new_maxregnum;
1146	global_const_equiv_map = map->const_equiv_map;
1147	global_const_equiv_map_size = new_maxregnum;
1148
1149	/* Search the list of bivs and givs to find ones which need to be remapped
1150	when split, and set their reg_map entry appropriately. */
1151
1152	for (bl = loop_iv_list; bl; bl = bl->next)
1153	{
1154	if (REGNO (bl->biv->src_reg) != bl->regno)
1155	map->reg_map[bl->regno] = bl->biv->src_reg;
1156	#if 0
1157	/* Currently, non-reduced/final-value givs are never split. */
1158	for (v = bl->giv; v; v = v->next_iv)
1159	if (REGNO (v->src_reg) != bl->regno)
1160	map->reg_map[REGNO (v->dest_reg)] = v->src_reg;
1161	#endif
1162	}
1163
1164	/* Use our current register alignment and pointer flags. */
1165	map->regno_pointer_flag = regno_pointer_flag;
1166	map->regno_pointer_align = regno_pointer_align;
1167
1168	/* If the loop is being partially unrolled, and the iteration variables
1169	are being split, and are being renamed for the split, then must fix up
1170	the compare/jump instruction at the end of the loop to refer to the new
1171	registers. This compare isn't copied, so the registers used in it
1172	will never be replaced if it isn't done here. */
1173
1174	if (unroll_type == UNROLL_MODULO)
1175	{
1176	insn = NEXT_INSN (copy_end);
1177	if (GET_CODE (insn) == INSN \|\| GET_CODE (insn) == JUMP_INSN)
1178	PATTERN (insn) = remap_split_bivs (PATTERN (insn));
1179	}
1180
1181	/* For unroll_number - 1 times, make a copy of each instruction
1182	between copy_start and copy_end, and insert these new instructions
1183	before the end of the loop. */
1184
1185	for (i = 0; i < unroll_number; i++)
1186	{
1187	bzero ((char ) map->insn_map, max_insnno sizeof (rtx));
1188	bzero ((char ) map->const_equiv_map, new_maxregnum sizeof (rtx));
1189	bzero ((char ) map->const_age_map, new_maxregnum sizeof (unsigned));
1190	map->const_age = 0;
1191
1192	for (j = 0; j < max_labelno; j++)
1193	if (local_label[j])
1194	set_label_in_map (map, j, gen_label_rtx ());
1195
1196	for (j = FIRST_PSEUDO_REGISTER; j < max_reg_before_loop; j++)
1197	if (local_regno[j])
1198	map->reg_map[j] = gen_reg_rtx (GET_MODE (regno_reg_rtx[j]));
1199
1200	/* If loop starts with a branch to the test, then fix it so that
1201	it points to the test of the first unrolled copy of the loop. */
1202	if (i == 0 && loop_start != copy_start)
1203	{
1204	insn = PREV_INSN (copy_start);
1205	pattern = PATTERN (insn);
1206
1207	tem = get_label_from_map (map,
1208	CODE_LABEL_NUMBER
1209	(XEXP (SET_SRC (pattern), 0)));
1210	SET_SRC (pattern) = gen_rtx (LABEL_REF, VOIDmode, tem);
1211
1212	/* Set the jump label so that it can be used by later loop unrolling
1213	passes. */
1214	JUMP_LABEL (insn) = tem;
1215	LABEL_NUSES (tem)++;
1216	}
1217
1218	copy_loop_body (copy_start, copy_end, map, exit_label,
1219	i == unroll_number - 1, unroll_type, start_label,
1220	loop_end, insert_before, insert_before);
1221	}
1222
1223	/* Before deleting any insns, emit a CODE_LABEL immediately after the last
1224	insn to be deleted. This prevents any runaway delete_insn call from
1225	more insns that it should, as it always stops at a CODE_LABEL. */
1226
1227	/* Delete the compare and branch at the end of the loop if completely
1228	unrolling the loop. Deleting the backward branch at the end also
1229	deletes the code label at the start of the loop. This is done at
1230	the very end to avoid problems with back_branch_in_range_p. */
1231
1232	if (unroll_type == UNROLL_COMPLETELY)
1233	safety_label = emit_label_after (gen_label_rtx (), last_loop_insn);
1234	else
1235	safety_label = emit_label_after (gen_label_rtx (), copy_end);
1236
1237	/* Delete all of the original loop instructions. Don't delete the
1238	LOOP_BEG note, or the first code label in the loop. */
1239
1240	insn = NEXT_INSN (copy_start);
1241	while (insn != safety_label)
1242	{
1243	if (insn != start_label)
1244	insn = delete_insn (insn);
1245	else
1246	insn = NEXT_INSN (insn);
1247	}
1248
1249	/* Can now delete the 'safety' label emitted to protect us from runaway
1250	delete_insn calls. */
1251	if (INSN_DELETED_P (safety_label))
1252	abort ();
1253	delete_insn (safety_label);
1254
1255	/* If exit_label exists, emit it after the loop. Doing the emit here
1256	forces it to have a higher INSN_UID than any insn in the unrolled loop.
1257	This is needed so that mostly_true_jump in reorg.c will treat jumps
1258	to this loop end label correctly, i.e. predict that they are usually
1259	not taken. */
1260	if (exit_label)
1261	emit_label_after (exit_label, loop_end);
1262	}
1263
1264	/* Return true if the loop can be safely, and profitably, preconditioned
1265	so that the unrolled copies of the loop body don't need exit tests.
1266
1267	This only works if final_value, initial_value and increment can be
1268	determined, and if increment is a constant power of 2.
1269	If increment is not a power of 2, then the preconditioning modulo
1270	operation would require a real modulo instead of a boolean AND, and this
1271	is not considered `profitable'. */
1272
1273	/* ??? If the loop is known to be executed very many times, or the machine
1274	has a very cheap divide instruction, then preconditioning is a win even
1275	when the increment is not a power of 2. Use RTX_COST to compute
1276	whether divide is cheap. */
1277
1278	static int
1279	precondition_loop_p (initial_value, final_value, increment, loop_start,
1280	loop_end)
1281	rtx initial_value, final_value, *increment;
1282	rtx loop_start, loop_end;
1283	{
1284
1285	if (loop_n_iterations > 0)
1286	{
1287	*initial_value = const0_rtx;
1288	*increment = const1_rtx;
1289	*final_value = GEN_INT (loop_n_iterations);
1290
1291	if (loop_dump_stream)
1292	fprintf (loop_dump_stream,
1293	"Preconditioning: Success, number of iterations known, %d.\n",
1294	loop_n_iterations);
1295	return 1;
1296	}
1297
1298	if (loop_initial_value == 0)
1299	{
1300	if (loop_dump_stream)
1301	fprintf (loop_dump_stream,
1302	"Preconditioning: Could not find initial value.\n");
1303	return 0;
1304	}
1305	else if (loop_increment == 0)
1306	{
1307	if (loop_dump_stream)
1308	fprintf (loop_dump_stream,
1309	"Preconditioning: Could not find increment value.\n");
1310	return 0;
1311	}
1312	else if (GET_CODE (loop_increment) != CONST_INT)
1313	{
1314	if (loop_dump_stream)
1315	fprintf (loop_dump_stream,
1316	"Preconditioning: Increment not a constant.\n");
1317	return 0;
1318	}
1319	else if ((exact_log2 (INTVAL (loop_increment)) < 0)
1320	&& (exact_log2 (- INTVAL (loop_increment)) < 0))
1321	{
1322	if (loop_dump_stream)
1323	fprintf (loop_dump_stream,
1324	"Preconditioning: Increment not a constant power of 2.\n");
1325	return 0;
1326	}
1327
1328	/* Unsigned_compare and compare_dir can be ignored here, since they do
1329	not matter for preconditioning. */
1330
1331	if (loop_final_value == 0)
1332	{
1333	if (loop_dump_stream)
1334	fprintf (loop_dump_stream,
1335	"Preconditioning: EQ comparison loop.\n");
1336	return 0;
1337	}
1338
1339	/* Must ensure that final_value is invariant, so call invariant_p to
1340	check. Before doing so, must check regno against max_reg_before_loop
1341	to make sure that the register is in the range covered by invariant_p.
1342	If it isn't, then it is most likely a biv/giv which by definition are
1343	not invariant. */
1344	if ((GET_CODE (loop_final_value) == REG
1345	&& REGNO (loop_final_value) >= max_reg_before_loop)
1346	\|\| (GET_CODE (loop_final_value) == PLUS
1347	&& REGNO (XEXP (loop_final_value, 0)) >= max_reg_before_loop)
1348	\|\| ! invariant_p (loop_final_value))
1349	{
1350	if (loop_dump_stream)
1351	fprintf (loop_dump_stream,
1352	"Preconditioning: Final value not invariant.\n");
1353	return 0;
1354	}
1355
1356	/* Fail for floating point values, since the caller of this function
1357	does not have code to deal with them. */
1358	if (GET_MODE_CLASS (GET_MODE (loop_final_value)) == MODE_FLOAT
1359	\|\| GET_MODE_CLASS (GET_MODE (loop_initial_value)) == MODE_FLOAT)
1360	{
1361	if (loop_dump_stream)
1362	fprintf (loop_dump_stream,
1363	"Preconditioning: Floating point final or initial value.\n");
1364	return 0;
1365	}
1366
1367	/* Now set initial_value to be the iteration_var, since that may be a
1368	simpler expression, and is guaranteed to be correct if all of the
1369	above tests succeed.
1370
1371	We can not use the initial_value as calculated, because it will be
1372	one too small for loops of the form "while (i-- > 0)". We can not
1373	emit code before the loop_skip_over insns to fix this problem as this
1374	will then give a number one too large for loops of the form
1375	"while (--i > 0)".
1376
1377	Note that all loops that reach here are entered at the top, because
1378	this function is not called if the loop starts with a jump. */
1379
1380	/* Fail if loop_iteration_var is not live before loop_start, since we need
1381	to test its value in the preconditioning code. */
1382
1383	if (uid_luid[REGNO_FIRST_UID (REGNO (loop_iteration_var))]
1384	> INSN_LUID (loop_start))
1385	{
1386	if (loop_dump_stream)
1387	fprintf (loop_dump_stream,
1388	"Preconditioning: Iteration var not live before loop start.\n");
1389	return 0;
1390	}
1391
1392	*initial_value = loop_iteration_var;
1393	*increment = loop_increment;
1394	*final_value = loop_final_value;
1395
1396	/* Success! */
1397	if (loop_dump_stream)
1398	fprintf (loop_dump_stream, "Preconditioning: Successful.\n");
1399	return 1;
1400	}
1401
1402
1403	/* All pseudo-registers must be mapped to themselves. Two hard registers
1404	must be mapped, VIRTUAL_STACK_VARS_REGNUM and VIRTUAL_INCOMING_ARGS_
1405	REGNUM, to avoid function-inlining specific conversions of these
1406	registers. All other hard regs can not be mapped because they may be
1407	used with different
1408	modes. */
1409
1410	static void
1411	init_reg_map (map, maxregnum)
1412	struct inline_remap *map;
1413	int maxregnum;
1414	{
1415	int i;
1416
1417	for (i = maxregnum - 1; i > LAST_VIRTUAL_REGISTER; i--)
1418	map->reg_map[i] = regno_reg_rtx[i];
1419	/* Just clear the rest of the entries. */
1420	for (i = LAST_VIRTUAL_REGISTER; i >= 0; i--)
1421	map->reg_map[i] = 0;
1422
1423	map->reg_map[VIRTUAL_STACK_VARS_REGNUM]
1424	= regno_reg_rtx[VIRTUAL_STACK_VARS_REGNUM];
1425	map->reg_map[VIRTUAL_INCOMING_ARGS_REGNUM]
1426	= regno_reg_rtx[VIRTUAL_INCOMING_ARGS_REGNUM];
1427	}
1428
1429	/* Strength-reduction will often emit code for optimized biv/givs which
1430	calculates their value in a temporary register, and then copies the result
1431	to the iv. This procedure reconstructs the pattern computing the iv;
1432	verifying that all operands are of the proper form.
1433
1434	PATTERN must be the result of single_set.
1435	The return value is the amount that the giv is incremented by. */
1436
1437	static rtx
1438	calculate_giv_inc (pattern, src_insn, regno)
1439	rtx pattern, src_insn;
1440	int regno;
1441	{
1442	rtx increment;
1443	rtx increment_total = 0;
1444	int tries = 0;
1445
1446	retry:
1447	/* Verify that we have an increment insn here. First check for a plus
1448	as the set source. */
1449	if (GET_CODE (SET_SRC (pattern)) != PLUS)
1450	{
1451	/* SR sometimes computes the new giv value in a temp, then copies it
1452	to the new_reg. */
1453	src_insn = PREV_INSN (src_insn);
1454	pattern = PATTERN (src_insn);
1455	if (GET_CODE (SET_SRC (pattern)) != PLUS)
1456	abort ();
1457
1458	/* The last insn emitted is not needed, so delete it to avoid confusing
1459	the second cse pass. This insn sets the giv unnecessarily. */
1460	delete_insn (get_last_insn ());
1461	}
1462
1463	/* Verify that we have a constant as the second operand of the plus. */
1464	increment = XEXP (SET_SRC (pattern), 1);
1465	if (GET_CODE (increment) != CONST_INT)
1466	{
1467	/* SR sometimes puts the constant in a register, especially if it is
1468	too big to be an add immed operand. */
1469	src_insn = PREV_INSN (src_insn);
1470	increment = SET_SRC (PATTERN (src_insn));
1471
1472	/* SR may have used LO_SUM to compute the constant if it is too large
1473	for a load immed operand. In this case, the constant is in operand
1474	one of the LO_SUM rtx. */
1475	if (GET_CODE (increment) == LO_SUM)
1476	increment = XEXP (increment, 1);
1477	else if (GET_CODE (increment) == IOR
1478	\|\| GET_CODE (increment) == ASHIFT
1479	\|\| GET_CODE (increment) == PLUS)
1480	{
1481	/* The rs6000 port loads some constants with IOR.
1482	The alpha port loads some constants with ASHIFT and PLUS. */
1483	rtx second_part = XEXP (increment, 1);
1484	enum rtx_code code = GET_CODE (increment);
1485
1486	src_insn = PREV_INSN (src_insn);
1487	increment = SET_SRC (PATTERN (src_insn));
1488	/* Don't need the last insn anymore. */
1489	delete_insn (get_last_insn ());
1490
1491	if (GET_CODE (second_part) != CONST_INT
1492	\|\| GET_CODE (increment) != CONST_INT)
1493	abort ();
1494
1495	if (code == IOR)
1496	increment = GEN_INT (INTVAL (increment) \| INTVAL (second_part));
1497	else if (code == PLUS)
1498	increment = GEN_INT (INTVAL (increment) + INTVAL (second_part));
1499	else
1500	increment = GEN_INT (INTVAL (increment) << INTVAL (second_part));
1501	}
1502
1503	if (GET_CODE (increment) != CONST_INT)
1504	abort ();
1505
1506	/* The insn loading the constant into a register is no longer needed,
1507	so delete it. */
1508	delete_insn (get_last_insn ());
1509	}
1510
1511	if (increment_total)
1512	increment_total = GEN_INT (INTVAL (increment_total) + INTVAL (increment));
1513	else
1514	increment_total = increment;
1515
1516	/* Check that the source register is the same as the register we expected
1517	to see as the source. If not, something is seriously wrong. */
1518	if (GET_CODE (XEXP (SET_SRC (pattern), 0)) != REG
1519	\|\| REGNO (XEXP (SET_SRC (pattern), 0)) != regno)
1520	{
1521	/* Some machines (e.g. the romp), may emit two add instructions for
1522	certain constants, so lets try looking for another add immediately
1523	before this one if we have only seen one add insn so far. */
1524
1525	if (tries == 0)
1526	{
1527	tries++;
1528
1529	src_insn = PREV_INSN (src_insn);
1530	pattern = PATTERN (src_insn);
1531
1532	delete_insn (get_last_insn ());
1533
1534	goto retry;
1535	}
1536
1537	abort ();
1538	}
1539
1540	return increment_total;
1541	}
1542
1543	/* Copy REG_NOTES, except for insn references, because not all insn_map
1544	entries are valid yet. We do need to copy registers now though, because
1545	the reg_map entries can change during copying. */
1546
1547	static rtx
1548	initial_reg_note_copy (notes, map)
1549	rtx notes;
1550	struct inline_remap *map;
1551	{
1552	rtx copy;
1553
1554	if (notes == 0)
1555	return 0;
1556
1557	copy = rtx_alloc (GET_CODE (notes));
1558	PUT_MODE (copy, GET_MODE (notes));
1559
1560	if (GET_CODE (notes) == EXPR_LIST)
1561	XEXP (copy, 0) = copy_rtx_and_substitute (XEXP (notes, 0), map);
1562	else if (GET_CODE (notes) == INSN_LIST)
1563	/* Don't substitute for these yet. */
1564	XEXP (copy, 0) = XEXP (notes, 0);
1565	else
1566	abort ();
1567
1568	XEXP (copy, 1) = initial_reg_note_copy (XEXP (notes, 1), map);
1569
1570	return copy;
1571	}
1572
1573	/* Fixup insn references in copied REG_NOTES. */
1574
1575	static void
1576	final_reg_note_copy (notes, map)
1577	rtx notes;
1578	struct inline_remap *map;
1579	{
1580	rtx note;
1581
1582	for (note = notes; note; note = XEXP (note, 1))
1583	if (GET_CODE (note) == INSN_LIST)
1584	XEXP (note, 0) = map->insn_map[INSN_UID (XEXP (note, 0))];
1585	}
1586
1587	/* Copy each instruction in the loop, substituting from map as appropriate.
1588	This is very similar to a loop in expand_inline_function. */
1589
1590	static void
1591	copy_loop_body (copy_start, copy_end, map, exit_label, last_iteration,
1592	unroll_type, start_label, loop_end, insert_before,
1593	copy_notes_from)
1594	rtx copy_start, copy_end;
1595	struct inline_remap *map;
1596	rtx exit_label;
1597	int last_iteration;
1598	enum unroll_types unroll_type;
1599	rtx start_label, loop_end, insert_before, copy_notes_from;
1600	{
1601	rtx insn, pattern;
1602	rtx set, tem, copy;
1603	int dest_reg_was_split, i;
1604	rtx cc0_insn = 0;
1605	rtx final_label = 0;
1606	rtx giv_inc, giv_dest_reg, giv_src_reg;
1607
1608	/* If this isn't the last iteration, then map any references to the
1609	start_label to final_label. Final label will then be emitted immediately
1610	after the end of this loop body if it was ever used.
1611
1612	If this is the last iteration, then map references to the start_label
1613	to itself. */
1614	if (! last_iteration)
1615	{
1616	final_label = gen_label_rtx ();
1617	set_label_in_map (map, CODE_LABEL_NUMBER (start_label),
1618	final_label);
1619	}
1620	else
1621	set_label_in_map (map, CODE_LABEL_NUMBER (start_label), start_label);
1622
1623	start_sequence ();
1624
1625	insn = copy_start;
1626	do
1627	{
1628	insn = NEXT_INSN (insn);
1629
1630	map->orig_asm_operands_vector = 0;
1631
1632	switch (GET_CODE (insn))
1633	{
1634	case INSN:
1635	pattern = PATTERN (insn);
1636	copy = 0;
1637	giv_inc = 0;
1638
1639	/* Check to see if this is a giv that has been combined with
1640	some split address givs. (Combined in the sense that
1641	`combine_givs' in loop.c has put two givs in the same register.)
1642	In this case, we must search all givs based on the same biv to
1643	find the address givs. Then split the address givs.
1644	Do this before splitting the giv, since that may map the
1645	SET_DEST to a new register. */
1646
1647	if ((set = single_set (insn))
1648	&& GET_CODE (SET_DEST (set)) == REG
1649	&& addr_combined_regs[REGNO (SET_DEST (set))])
1650	{
1651	struct iv_class *bl;
1652	struct induction v, tv;
1653	int regno = REGNO (SET_DEST (set));
1654
1655	v = addr_combined_regs[REGNO (SET_DEST (set))];
1656	bl = reg_biv_class[REGNO (v->src_reg)];
1657
1658	/* Although the giv_inc amount is not needed here, we must call
1659	calculate_giv_inc here since it might try to delete the
1660	last insn emitted. If we wait until later to call it,
1661	we might accidentally delete insns generated immediately
1662	below by emit_unrolled_add. */
1663
1664	giv_inc = calculate_giv_inc (set, insn, regno);
1665
1666	/* Now find all address giv's that were combined with this
1667	giv 'v'. */
1668	for (tv = bl->giv; tv; tv = tv->next_iv)
1669	if (tv->giv_type == DEST_ADDR && tv->same == v)
1670	{
1671	int this_giv_inc;
1672
1673	/* If this DEST_ADDR giv was not split, then ignore it. */
1674	if (*tv->location != tv->dest_reg)
1675	continue;
1676
1677	/* Scale this_giv_inc if the multiplicative factors of
1678	the two givs are different. */
1679	this_giv_inc = INTVAL (giv_inc);
1680	if (tv->mult_val != v->mult_val)
1681	this_giv_inc = (this_giv_inc / INTVAL (v->mult_val)
1682	* INTVAL (tv->mult_val));
1683
1684	tv->dest_reg = plus_constant (tv->dest_reg, this_giv_inc);
1685	*tv->location = tv->dest_reg;
1686
1687	if (last_iteration && unroll_type != UNROLL_COMPLETELY)
1688	{
1689	/* Must emit an insn to increment the split address
1690	giv. Add in the const_adjust field in case there
1691	was a constant eliminated from the address. */
1692	rtx value, dest_reg;
1693
1694	/* tv->dest_reg will be either a bare register,
1695	or else a register plus a constant. */
1696	if (GET_CODE (tv->dest_reg) == REG)
1697	dest_reg = tv->dest_reg;
1698	else
1699	dest_reg = XEXP (tv->dest_reg, 0);
1700
1701	/* Check for shared address givs, and avoid
1702	incrementing the shared pseudo reg more than
1703	once. */
1704	if (! tv->same_insn)
1705	{
1706	/* tv->dest_reg may actually be a (PLUS (REG)
1707	(CONST)) here, so we must call plus_constant
1708	to add the const_adjust amount before calling
1709	emit_unrolled_add below. */
1710	value = plus_constant (tv->dest_reg,
1711	tv->const_adjust);
1712
1713	/* The constant could be too large for an add
1714	immediate, so can't directly emit an insn
1715	here. */
1716	emit_unrolled_add (dest_reg, XEXP (value, 0),
1717	XEXP (value, 1));
1718	}
1719
1720	/* Reset the giv to be just the register again, in case
1721	it is used after the set we have just emitted.
1722	We must subtract the const_adjust factor added in
1723	above. */
1724	tv->dest_reg = plus_constant (dest_reg,
1725	- tv->const_adjust);
1726	*tv->location = tv->dest_reg;
1727	}
1728	}
1729	}
1730
1731	/* If this is a setting of a splittable variable, then determine
1732	how to split the variable, create a new set based on this split,
1733	and set up the reg_map so that later uses of the variable will
1734	use the new split variable. */
1735
1736	dest_reg_was_split = 0;
1737
1738	if ((set = single_set (insn))
1739	&& GET_CODE (SET_DEST (set)) == REG
1740	&& splittable_regs[REGNO (SET_DEST (set))])
1741	{
1742	int regno = REGNO (SET_DEST (set));
1743
1744	dest_reg_was_split = 1;
1745
1746	/* Compute the increment value for the giv, if it wasn't
1747	already computed above. */
1748
1749	if (giv_inc == 0)
1750	giv_inc = calculate_giv_inc (set, insn, regno);
1751	giv_dest_reg = SET_DEST (set);
1752	giv_src_reg = SET_DEST (set);
1753
1754	if (unroll_type == UNROLL_COMPLETELY)
1755	{
1756	/* Completely unrolling the loop. Set the induction
1757	variable to a known constant value. */
1758
1759	/* The value in splittable_regs may be an invariant
1760	value, so we must use plus_constant here. */
1761	splittable_regs[regno]
1762	= plus_constant (splittable_regs[regno], INTVAL (giv_inc));
1763
1764	if (GET_CODE (splittable_regs[regno]) == PLUS)
1765	{
1766	giv_src_reg = XEXP (splittable_regs[regno], 0);
1767	giv_inc = XEXP (splittable_regs[regno], 1);
1768	}
1769	else
1770	{
1771	/* The splittable_regs value must be a REG or a
1772	CONST_INT, so put the entire value in the giv_src_reg
1773	variable. */
1774	giv_src_reg = splittable_regs[regno];
1775	giv_inc = const0_rtx;
1776	}
1777	}
1778	else
1779	{
1780	/* Partially unrolling loop. Create a new pseudo
1781	register for the iteration variable, and set it to
1782	be a constant plus the original register. Except
1783	on the last iteration, when the result has to
1784	go back into the original iteration var register. */
1785
1786	/* Handle bivs which must be mapped to a new register
1787	when split. This happens for bivs which need their
1788	final value set before loop entry. The new register
1789	for the biv was stored in the biv's first struct
1790	induction entry by find_splittable_regs. */
1791
1792	if (regno < max_reg_before_loop
1793	&& reg_iv_type[regno] == BASIC_INDUCT)
1794	{
1795	giv_src_reg = reg_biv_class[regno]->biv->src_reg;
1796	giv_dest_reg = giv_src_reg;
1797	}
1798
1799	#if 0
1800	/* If non-reduced/final-value givs were split, then
1801	this would have to remap those givs also. See
1802	find_splittable_regs. */
1803	#endif
1804
1805	splittable_regs[regno]
1806	= GEN_INT (INTVAL (giv_inc)
1807	+ INTVAL (splittable_regs[regno]));
1808	giv_inc = splittable_regs[regno];
1809
1810	/* Now split the induction variable by changing the dest
1811	of this insn to a new register, and setting its
1812	reg_map entry to point to this new register.
1813
1814	If this is the last iteration, and this is the last insn
1815	that will update the iv, then reuse the original dest,
1816	to ensure that the iv will have the proper value when
1817	the loop exits or repeats.
1818
1819	Using splittable_regs_updates here like this is safe,
1820	because it can only be greater than one if all
1821	instructions modifying the iv are always executed in
1822	order. */
1823
1824	if (! last_iteration
1825	\|\| (splittable_regs_updates[regno]-- != 1))
1826	{
1827	tem = gen_reg_rtx (GET_MODE (giv_src_reg));
1828	giv_dest_reg = tem;
1829	map->reg_map[regno] = tem;
1830	}
1831	else
1832	map->reg_map[regno] = giv_src_reg;
1833	}
1834
1835	/* The constant being added could be too large for an add
1836	immediate, so can't directly emit an insn here. */
1837	emit_unrolled_add (giv_dest_reg, giv_src_reg, giv_inc);
1838	copy = get_last_insn ();
1839	pattern = PATTERN (copy);
1840	}
1841	else
1842	{
1843	pattern = copy_rtx_and_substitute (pattern, map);
1844	copy = emit_insn (pattern);
1845	}
1846	REG_NOTES (copy) = initial_reg_note_copy (REG_NOTES (insn), map);
1847
1848	#ifdef HAVE_cc0
1849	/* If this insn is setting CC0, it may need to look at
1850	the insn that uses CC0 to see what type of insn it is.
1851	In that case, the call to recog via validate_change will
1852	fail. So don't substitute constants here. Instead,
1853	do it when we emit the following insn.
1854
1855	For example, see the pyr.md file. That machine has signed and
1856	unsigned compares. The compare patterns must check the
1857	following branch insn to see which what kind of compare to
1858	emit.
1859
1860	If the previous insn set CC0, substitute constants on it as
1861	well. */
1862	if (sets_cc0_p (PATTERN (copy)) != 0)
1863	cc0_insn = copy;
1864	else
1865	{
1866	if (cc0_insn)
1867	try_constants (cc0_insn, map);
1868	cc0_insn = 0;
1869	try_constants (copy, map);
1870	}
1871	#else
1872	try_constants (copy, map);
1873	#endif
1874
1875	/* Make split induction variable constants `permanent' since we
1876	know there are no backward branches across iteration variable
1877	settings which would invalidate this. */
1878	if (dest_reg_was_split)
1879	{
1880	int regno = REGNO (SET_DEST (pattern));
1881
1882	if (regno < map->const_equiv_map_size
1883	&& map->const_age_map[regno] == map->const_age)
1884	map->const_age_map[regno] = -1;
1885	}
1886	break;
1887
1888	case JUMP_INSN:
1889	pattern = copy_rtx_and_substitute (PATTERN (insn), map);
1890	copy = emit_jump_insn (pattern);
1891	REG_NOTES (copy) = initial_reg_note_copy (REG_NOTES (insn), map);
1892
1893	if (JUMP_LABEL (insn) == start_label && insn == copy_end
1894	&& ! last_iteration)
1895	{
1896	/* This is a branch to the beginning of the loop; this is the
1897	last insn being copied; and this is not the last iteration.
1898	In this case, we want to change the original fall through
1899	case to be a branch past the end of the loop, and the
1900	original jump label case to fall_through. */
1901
1902	if (invert_exp (pattern, copy))
1903	{
1904	if (! redirect_exp (&pattern,
1905	get_label_from_map (map,
1906	CODE_LABEL_NUMBER
1907	(JUMP_LABEL (insn))),
1908	exit_label, copy))
1909	abort ();
1910	}
1911	else
1912	{
1913	rtx jmp;
1914	rtx lab = gen_label_rtx ();
1915	/* Can't do it by reversing the jump (probably because we
1916	couldn't reverse the conditions), so emit a new
1917	jump_insn after COPY, and redirect the jump around
1918	that. */
1919	jmp = emit_jump_insn_after (gen_jump (exit_label), copy);
1920	jmp = emit_barrier_after (jmp);
1921	emit_label_after (lab, jmp);
1922	LABEL_NUSES (lab) = 0;
1923	if (! redirect_exp (&pattern,
1924	get_label_from_map (map,
1925	CODE_LABEL_NUMBER
1926	(JUMP_LABEL (insn))),
1927	lab, copy))
1928	abort ();
1929	}
1930	}
1931
1932	#ifdef HAVE_cc0
1933	if (cc0_insn)
1934	try_constants (cc0_insn, map);
1935	cc0_insn = 0;
1936	#endif
1937	try_constants (copy, map);
1938
1939	/* Set the jump label of COPY correctly to avoid problems with
1940	later passes of unroll_loop, if INSN had jump label set. */
1941	if (JUMP_LABEL (insn))
1942	{
1943	rtx label = 0;
1944
1945	/* Can't use the label_map for every insn, since this may be
1946	the backward branch, and hence the label was not mapped. */
1947	if ((set = single_set (copy)))
1948	{
1949	tem = SET_SRC (set);
1950	if (GET_CODE (tem) == LABEL_REF)
1951	label = XEXP (tem, 0);
1952	else if (GET_CODE (tem) == IF_THEN_ELSE)
1953	{
1954	if (XEXP (tem, 1) != pc_rtx)
1955	label = XEXP (XEXP (tem, 1), 0);
1956	else
1957	label = XEXP (XEXP (tem, 2), 0);
1958	}
1959	}
1960
1961	if (label && GET_CODE (label) == CODE_LABEL)
1962	JUMP_LABEL (copy) = label;
1963	else
1964	{
1965	/* An unrecognizable jump insn, probably the entry jump
1966	for a switch statement. This label must have been mapped,
1967	so just use the label_map to get the new jump label. */
1968	JUMP_LABEL (copy)
1969	= get_label_from_map (map,
1970	CODE_LABEL_NUMBER (JUMP_LABEL (insn)));
1971	}
1972
1973	/* If this is a non-local jump, then must increase the label
1974	use count so that the label will not be deleted when the
1975	original jump is deleted. */
1976	LABEL_NUSES (JUMP_LABEL (copy))++;
1977	}
1978	else if (GET_CODE (PATTERN (copy)) == ADDR_VEC
1979	\|\| GET_CODE (PATTERN (copy)) == ADDR_DIFF_VEC)
1980	{
1981	rtx pat = PATTERN (copy);
1982	int diff_vec_p = GET_CODE (pat) == ADDR_DIFF_VEC;
1983	int len = XVECLEN (pat, diff_vec_p);
1984	int i;
1985
1986	for (i = 0; i < len; i++)
1987	LABEL_NUSES (XEXP (XVECEXP (pat, diff_vec_p, i), 0))++;
1988	}
1989
1990	/* If this used to be a conditional jump insn but whose branch
1991	direction is now known, we must do something special. */
1992	if (condjump_p (insn) && !simplejump_p (insn) && map->last_pc_value)
1993	{
1994	#ifdef HAVE_cc0
1995	/* The previous insn set cc0 for us. So delete it. */
1996	delete_insn (PREV_INSN (copy));
1997	#endif
1998
1999	/* If this is now a no-op, delete it. */
2000	if (map->last_pc_value == pc_rtx)
2001	{
2002	/* Don't let delete_insn delete the label referenced here,
2003	because we might possibly need it later for some other
2004	instruction in the loop. */
2005	if (JUMP_LABEL (copy))
2006	LABEL_NUSES (JUMP_LABEL (copy))++;
2007	delete_insn (copy);
2008	if (JUMP_LABEL (copy))
2009	LABEL_NUSES (JUMP_LABEL (copy))--;
2010	copy = 0;
2011	}
2012	else
2013	/* Otherwise, this is unconditional jump so we must put a
2014	BARRIER after it. We could do some dead code elimination
2015	here, but jump.c will do it just as well. */
2016	emit_barrier ();
2017	}
2018	break;
2019
2020	case CALL_INSN:
2021	pattern = copy_rtx_and_substitute (PATTERN (insn), map);
2022	copy = emit_call_insn (pattern);
2023	REG_NOTES (copy) = initial_reg_note_copy (REG_NOTES (insn), map);
2024
2025	/* Because the USAGE information potentially contains objects other
2026	than hard registers, we need to copy it. */
2027	CALL_INSN_FUNCTION_USAGE (copy)
2028	= copy_rtx_and_substitute (CALL_INSN_FUNCTION_USAGE (insn), map);
2029
2030	#ifdef HAVE_cc0
2031	if (cc0_insn)
2032	try_constants (cc0_insn, map);
2033	cc0_insn = 0;
2034	#endif
2035	try_constants (copy, map);
2036
2037	/* Be lazy and assume CALL_INSNs clobber all hard registers. */
2038	for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
2039	map->const_equiv_map[i] = 0;
2040	break;
2041
2042	case CODE_LABEL:
2043	/* If this is the loop start label, then we don't need to emit a
2044	copy of this label since no one will use it. */
2045
2046	if (insn != start_label)
2047	{
2048	copy = emit_label (get_label_from_map (map,
2049	CODE_LABEL_NUMBER (insn)));
2050	map->const_age++;
2051	}
2052	break;
2053
2054	case BARRIER:
2055	copy = emit_barrier ();
2056	break;
2057
2058	case NOTE:
2059	/* VTOP notes are valid only before the loop exit test. If placed
2060	anywhere else, loop may generate bad code. */
2061
2062	if (NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED
2063	&& (NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_VTOP
2064	\|\| (last_iteration && unroll_type != UNROLL_COMPLETELY)))
2065	copy = emit_note (NOTE_SOURCE_FILE (insn),
2066	NOTE_LINE_NUMBER (insn));
2067	else
2068	copy = 0;
2069	break;
2070
2071	default:
2072	abort ();
2073	break;
2074	}
2075
2076	map->insn_map[INSN_UID (insn)] = copy;
2077	}
2078	while (insn != copy_end);
2079
2080	/* Now finish coping the REG_NOTES. */
2081	insn = copy_start;
2082	do
2083	{
2084	insn = NEXT_INSN (insn);
2085	if ((GET_CODE (insn) == INSN \|\| GET_CODE (insn) == JUMP_INSN
2086	\|\| GET_CODE (insn) == CALL_INSN)
2087	&& map->insn_map[INSN_UID (insn)])
2088	final_reg_note_copy (REG_NOTES (map->insn_map[INSN_UID (insn)]), map);
2089	}
2090	while (insn != copy_end);
2091
2092	/* There may be notes between copy_notes_from and loop_end. Emit a copy of
2093	each of these notes here, since there may be some important ones, such as
2094	NOTE_INSN_BLOCK_END notes, in this group. We don't do this on the last
2095	iteration, because the original notes won't be deleted.
2096
2097	We can't use insert_before here, because when from preconditioning,
2098	insert_before points before the loop. We can't use copy_end, because
2099	there may be insns already inserted after it (which we don't want to
2100	copy) when not from preconditioning code. */
2101
2102	if (! last_iteration)
2103	{
2104	for (insn = copy_notes_from; insn != loop_end; insn = NEXT_INSN (insn))
2105	{
2106	if (GET_CODE (insn) == NOTE
2107	&& NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED)
2108	emit_note (NOTE_SOURCE_FILE (insn), NOTE_LINE_NUMBER (insn));
2109	}
2110	}
2111
2112	if (final_label && LABEL_NUSES (final_label) > 0)
2113	emit_label (final_label);
2114
2115	tem = gen_sequence ();
2116	end_sequence ();
2117	emit_insn_before (tem, insert_before);
2118	}
2119
2120	/* Emit an insn, using the expand_binop to ensure that a valid insn is
2121	emitted. This will correctly handle the case where the increment value
2122	won't fit in the immediate field of a PLUS insns. */
2123
2124	void
2125	emit_unrolled_add (dest_reg, src_reg, increment)
2126	rtx dest_reg, src_reg, increment;
2127	{
2128	rtx result;
2129
2130	result = expand_binop (GET_MODE (dest_reg), add_optab, src_reg, increment,
2131	dest_reg, 0, OPTAB_LIB_WIDEN);
2132
2133	if (dest_reg != result)
2134	emit_move_insn (dest_reg, result);
2135	}
2136
2137	/* Searches the insns between INSN and LOOP_END. Returns 1 if there
2138	is a backward branch in that range that branches to somewhere between
2139	LOOP_START and INSN. Returns 0 otherwise. */
2140
2141	/* ??? This is quadratic algorithm. Could be rewritten to be linear.
2142	In practice, this is not a problem, because this function is seldom called,
2143	and uses a negligible amount of CPU time on average. */
2144
2145	int
2146	back_branch_in_range_p (insn, loop_start, loop_end)
2147	rtx insn;
2148	rtx loop_start, loop_end;
2149	{
2150	rtx p, q, target_insn;
2151	rtx orig_loop_end = loop_end;
2152
2153	/* Stop before we get to the backward branch at the end of the loop. */
2154	loop_end = prev_nonnote_insn (loop_end);
2155	if (GET_CODE (loop_end) == BARRIER)
2156	loop_end = PREV_INSN (loop_end);
2157
2158	/* Check in case insn has been deleted, search forward for first non
2159	deleted insn following it. */
2160	while (INSN_DELETED_P (insn))
2161	insn = NEXT_INSN (insn);
2162
2163	/* Check for the case where insn is the last insn in the loop. Deal
2164	with the case where INSN was a deleted loop test insn, in which case
2165	it will now be the NOTE_LOOP_END. */
2166	if (insn == loop_end \|\| insn == orig_loop_end)
2167	return 0;
2168
2169	for (p = NEXT_INSN (insn); p != loop_end; p = NEXT_INSN (p))
2170	{
2171	if (GET_CODE (p) == JUMP_INSN)
2172	{
2173	target_insn = JUMP_LABEL (p);
2174
2175	/* Search from loop_start to insn, to see if one of them is
2176	the target_insn. We can't use INSN_LUID comparisons here,
2177	since insn may not have an LUID entry. */
2178	for (q = loop_start; q != insn; q = NEXT_INSN (q))
2179	if (q == target_insn)
2180	return 1;
2181	}
2182	}
2183
2184	return 0;
2185	}
2186
2187	/* Try to generate the simplest rtx for the expression
2188	(PLUS (MULT mult1 mult2) add1). This is used to calculate the initial
2189	value of giv's. */
2190
2191	static rtx
2192	fold_rtx_mult_add (mult1, mult2, add1, mode)
2193	rtx mult1, mult2, add1;
2194	enum machine_mode mode;
2195	{
2196	rtx temp, mult_res;
2197	rtx result;
2198
2199	/* The modes must all be the same. This should always be true. For now,
2200	check to make sure. */
2201	if ((GET_MODE (mult1) != mode && GET_MODE (mult1) != VOIDmode)
2202	\|\| (GET_MODE (mult2) != mode && GET_MODE (mult2) != VOIDmode)
2203	\|\| (GET_MODE (add1) != mode && GET_MODE (add1) != VOIDmode))
2204	abort ();
2205
2206	/* Ensure that if at least one of mult1/mult2 are constant, then mult2
2207	will be a constant. */
2208	if (GET_CODE (mult1) == CONST_INT)
2209	{
2210	temp = mult2;
2211	mult2 = mult1;
2212	mult1 = temp;
2213	}
2214
2215	mult_res = simplify_binary_operation (MULT, mode, mult1, mult2);
2216	if (! mult_res)
2217	mult_res = gen_rtx (MULT, mode, mult1, mult2);
2218
2219	/* Again, put the constant second. */
2220	if (GET_CODE (add1) == CONST_INT)
2221	{
2222	temp = add1;
2223	add1 = mult_res;
2224	mult_res = temp;
2225	}
2226
2227	result = simplify_binary_operation (PLUS, mode, add1, mult_res);
2228	if (! result)
2229	result = gen_rtx (PLUS, mode, add1, mult_res);
2230
2231	return result;
2232	}
2233
2234	/* Searches the list of induction struct's for the biv BL, to try to calculate
2235	the total increment value for one iteration of the loop as a constant.
2236
2237	Returns the increment value as an rtx, simplified as much as possible,
2238	if it can be calculated. Otherwise, returns 0. */
2239
2240	rtx
2241	biv_total_increment (bl, loop_start, loop_end)
2242	struct iv_class *bl;
2243	rtx loop_start, loop_end;
2244	{
2245	struct induction *v;
2246	rtx result;
2247
2248	/* For increment, must check every instruction that sets it. Each
2249	instruction must be executed only once each time through the loop.
2250	To verify this, we check that the the insn is always executed, and that
2251	there are no backward branches after the insn that branch to before it.
2252	Also, the insn must have a mult_val of one (to make sure it really is
2253	an increment). */
2254
2255	result = const0_rtx;
2256	for (v = bl->biv; v; v = v->next_iv)
2257	{
2258	if (v->always_computable && v->mult_val == const1_rtx
2259	&& ! back_branch_in_range_p (v->insn, loop_start, loop_end))
2260	result = fold_rtx_mult_add (result, const1_rtx, v->add_val, v->mode);
2261	else
2262	return 0;
2263	}
2264
2265	return result;
2266	}
2267
2268	/* Determine the initial value of the iteration variable, and the amount
2269	that it is incremented each loop. Use the tables constructed by
2270	the strength reduction pass to calculate these values.
2271
2272	Initial_value and/or increment are set to zero if their values could not
2273	be calculated. */
2274
2275	static void
2276	iteration_info (iteration_var, initial_value, increment, loop_start, loop_end)
2277	rtx iteration_var, initial_value, increment;
2278	rtx loop_start, loop_end;
2279	{
2280	struct iv_class *bl;
2281	struct induction v, b;
2282
2283	/* Clear the result values, in case no answer can be found. */
2284	*initial_value = 0;
2285	*increment = 0;
2286
2287	/* The iteration variable can be either a giv or a biv. Check to see
2288	which it is, and compute the variable's initial value, and increment
2289	value if possible. */
2290
2291	/* If this is a new register, can't handle it since we don't have any
2292	reg_iv_type entry for it. */
2293	if (REGNO (iteration_var) >= max_reg_before_loop)
2294	{
2295	if (loop_dump_stream)
2296	fprintf (loop_dump_stream,
2297	"Loop unrolling: No reg_iv_type entry for iteration var.\n");
2298	return;
2299	}
2300
2301	/* Reject iteration variables larger than the host wide int size, since they
2302	could result in a number of iterations greater than the range of our
2303	`unsigned HOST_WIDE_INT' variable loop_n_iterations. */
2304	else if ((GET_MODE_BITSIZE (GET_MODE (iteration_var))
2305	> HOST_BITS_PER_WIDE_INT))
2306	{
2307	if (loop_dump_stream)
2308	fprintf (loop_dump_stream,
2309	"Loop unrolling: Iteration var rejected because mode too large.\n");
2310	return;
2311	}
2312	else if (GET_MODE_CLASS (GET_MODE (iteration_var)) != MODE_INT)
2313	{
2314	if (loop_dump_stream)
2315	fprintf (loop_dump_stream,
2316	"Loop unrolling: Iteration var not an integer.\n");
2317	return;
2318	}
2319	else if (reg_iv_type[REGNO (iteration_var)] == BASIC_INDUCT)
2320	{
2321	/* Grab initial value, only useful if it is a constant. */
2322	bl = reg_biv_class[REGNO (iteration_var)];
2323	*initial_value = bl->initial_value;
2324
2325	*increment = biv_total_increment (bl, loop_start, loop_end);
2326	}
2327	else if (reg_iv_type[REGNO (iteration_var)] == GENERAL_INDUCT)
2328	{
2329	#if 1
2330	/* ??? The code below does not work because the incorrect number of
2331	iterations is calculated when the biv is incremented after the giv
2332	is set (which is the usual case). This can probably be accounted
2333	for by biasing the initial_value by subtracting the amount of the
2334	increment that occurs between the giv set and the giv test. However,
2335	a giv as an iterator is very rare, so it does not seem worthwhile
2336	to handle this. */
2337	/* ??? An example failure is: i = 6; do {;} while (i++ < 9). */
2338	if (loop_dump_stream)
2339	fprintf (loop_dump_stream,
2340	"Loop unrolling: Giv iterators are not handled.\n");
2341	return;
2342	#else
2343	/* Initial value is mult_val times the biv's initial value plus
2344	add_val. Only useful if it is a constant. */
2345	v = reg_iv_info[REGNO (iteration_var)];
2346	bl = reg_biv_class[REGNO (v->src_reg)];
2347	*initial_value = fold_rtx_mult_add (v->mult_val, bl->initial_value,
2348	v->add_val, v->mode);
2349
2350	/* Increment value is mult_val times the increment value of the biv. */
2351
2352	*increment = biv_total_increment (bl, loop_start, loop_end);
2353	if (*increment)
2354	increment = fold_rtx_mult_add (v->mult_val, increment, const0_rtx,
2355	v->mode);
2356	#endif
2357	}
2358	else
2359	{
2360	if (loop_dump_stream)
2361	fprintf (loop_dump_stream,
2362	"Loop unrolling: Not basic or general induction var.\n");
2363	return;
2364	}
2365	}
2366
2367	/* Calculate the approximate final value of the iteration variable
2368	which has an loop exit test with code COMPARISON_CODE and comparison value
2369	of COMPARISON_VALUE. Also returns an indication of whether the comparison
2370	was signed or unsigned, and the direction of the comparison. This info is
2371	needed to calculate the number of loop iterations. */
2372
2373	static rtx
2374	approx_final_value (comparison_code, comparison_value, unsigned_p, compare_dir)
2375	enum rtx_code comparison_code;
2376	rtx comparison_value;
2377	int *unsigned_p;
2378	int *compare_dir;
2379	{
2380	/* Calculate the final value of the induction variable.
2381	The exact final value depends on the branch operator, and increment sign.
2382	This is only an approximate value. It will be wrong if the iteration
2383	variable is not incremented by one each time through the loop, and
2384	approx final value - start value % increment != 0. */
2385
2386	*unsigned_p = 0;
2387	switch (comparison_code)
2388	{
2389	case LEU:
2390	*unsigned_p = 1;
2391	case LE:
2392	*compare_dir = 1;
2393	return plus_constant (comparison_value, 1);
2394	case GEU:
2395	*unsigned_p = 1;
2396	case GE:
2397	*compare_dir = -1;
2398	return plus_constant (comparison_value, -1);
2399	case EQ:
2400	/* Can not calculate a final value for this case. */
2401	*compare_dir = 0;
2402	return 0;
2403	case LTU:
2404	*unsigned_p = 1;
2405	case LT:
2406	*compare_dir = 1;
2407	return comparison_value;
2408	break;
2409	case GTU:
2410	*unsigned_p = 1;
2411	case GT:
2412	*compare_dir = -1;
2413	return comparison_value;
2414	case NE:
2415	*compare_dir = 0;
2416	return comparison_value;
2417	default:
2418	abort ();
2419	}
2420	}
2421
2422	/* For each biv and giv, determine whether it can be safely split into
2423	a different variable for each unrolled copy of the loop body. If it
2424	is safe to split, then indicate that by saving some useful info
2425	in the splittable_regs array.
2426
2427	If the loop is being completely unrolled, then splittable_regs will hold
2428	the current value of the induction variable while the loop is unrolled.
2429	It must be set to the initial value of the induction variable here.
2430	Otherwise, splittable_regs will hold the difference between the current
2431	value of the induction variable and the value the induction variable had
2432	at the top of the loop. It must be set to the value 0 here.
2433
2434	Returns the total number of instructions that set registers that are
2435	splittable. */
2436
2437	/* ?? If the loop is only unrolled twice, then most of the restrictions to
2438	constant values are unnecessary, since we can easily calculate increment
2439	values in this case even if nothing is constant. The increment value
2440	should not involve a multiply however. */
2441
2442	/* ?? Even if the biv/giv increment values aren't constant, it may still
2443	be beneficial to split the variable if the loop is only unrolled a few
2444	times, since multiplies by small integers (1,2,3,4) are very cheap. */
2445
2446	static int
2447	find_splittable_regs (unroll_type, loop_start, loop_end, end_insert_before,
2448	unroll_number)
2449	enum unroll_types unroll_type;
2450	rtx loop_start, loop_end;
2451	rtx end_insert_before;
2452	int unroll_number;
2453	{
2454	struct iv_class *bl;
2455	struct induction *v;
2456	rtx increment, tem;
2457	rtx biv_final_value;
2458	int biv_splittable;
2459	int result = 0;
2460
2461	for (bl = loop_iv_list; bl; bl = bl->next)
2462	{
2463	/* Biv_total_increment must return a constant value,
2464	otherwise we can not calculate the split values. */
2465
2466	increment = biv_total_increment (bl, loop_start, loop_end);
2467	if (! increment \|\| GET_CODE (increment) != CONST_INT)
2468	continue;
2469
2470	/* The loop must be unrolled completely, or else have a known number
2471	of iterations and only one exit, or else the biv must be dead
2472	outside the loop, or else the final value must be known. Otherwise,
2473	it is unsafe to split the biv since it may not have the proper
2474	value on loop exit. */
2475
2476	/* loop_number_exit_count is non-zero if the loop has an exit other than
2477	a fall through at the end. */
2478
2479	biv_splittable = 1;
2480	biv_final_value = 0;
2481	if (unroll_type != UNROLL_COMPLETELY
2482	&& (loop_number_exit_count[uid_loop_num[INSN_UID (loop_start)]]
2483	\|\| unroll_type == UNROLL_NAIVE)
2484	&& (uid_luid[REGNO_LAST_UID (bl->regno)] >= INSN_LUID (loop_end)
2485	\|\| ! bl->init_insn
2486	\|\| INSN_UID (bl->init_insn) >= max_uid_for_loop
2487	\|\| (uid_luid[REGNO_FIRST_UID (bl->regno)]
2488	< INSN_LUID (bl->init_insn))
2489	\|\| reg_mentioned_p (bl->biv->dest_reg, SET_SRC (bl->init_set)))
2490	&& ! (biv_final_value = final_biv_value (bl, loop_start, loop_end)))
2491	biv_splittable = 0;
2492
2493	/* If any of the insns setting the BIV don't do so with a simple
2494	PLUS, we don't know how to split it. */
2495	for (v = bl->biv; biv_splittable && v; v = v->next_iv)
2496	if ((tem = single_set (v->insn)) == 0
2497	\|\| GET_CODE (SET_DEST (tem)) != REG
2498	\|\| REGNO (SET_DEST (tem)) != bl->regno
2499	\|\| GET_CODE (SET_SRC (tem)) != PLUS)
2500	biv_splittable = 0;
2501
2502	/* If final value is non-zero, then must emit an instruction which sets
2503	the value of the biv to the proper value. This is done after
2504	handling all of the givs, since some of them may need to use the
2505	biv's value in their initialization code. */
2506
2507	/* This biv is splittable. If completely unrolling the loop, save
2508	the biv's initial value. Otherwise, save the constant zero. */
2509
2510	if (biv_splittable == 1)
2511	{
2512	if (unroll_type == UNROLL_COMPLETELY)
2513	{
2514	/* If the initial value of the biv is itself (i.e. it is too
2515	complicated for strength_reduce to compute), or is a hard
2516	register, or it isn't invariant, then we must create a new
2517	pseudo reg to hold the initial value of the biv. */
2518
2519	if (GET_CODE (bl->initial_value) == REG
2520	&& (REGNO (bl->initial_value) == bl->regno
2521	\|\| REGNO (bl->initial_value) < FIRST_PSEUDO_REGISTER
2522	\|\| ! invariant_p (bl->initial_value)))
2523	{
2524	rtx tem = gen_reg_rtx (bl->biv->mode);
2525
2526	emit_insn_before (gen_move_insn (tem, bl->biv->src_reg),
2527	loop_start);
2528
2529	if (loop_dump_stream)
2530	fprintf (loop_dump_stream, "Biv %d initial value remapped to %d.\n",
2531	bl->regno, REGNO (tem));
2532
2533	splittable_regs[bl->regno] = tem;
2534	}
2535	else
2536	splittable_regs[bl->regno] = bl->initial_value;
2537	}
2538	else
2539	splittable_regs[bl->regno] = const0_rtx;
2540
2541	/* Save the number of instructions that modify the biv, so that
2542	we can treat the last one specially. */
2543
2544	splittable_regs_updates[bl->regno] = bl->biv_count;
2545	result += bl->biv_count;
2546
2547	if (loop_dump_stream)
2548	fprintf (loop_dump_stream,
2549	"Biv %d safe to split.\n", bl->regno);
2550	}
2551
2552	/* Check every giv that depends on this biv to see whether it is
2553	splittable also. Even if the biv isn't splittable, givs which
2554	depend on it may be splittable if the biv is live outside the
2555	loop, and the givs aren't. */
2556
2557	result += find_splittable_givs (bl, unroll_type, loop_start, loop_end,
2558	increment, unroll_number);
2559
2560	/* If final value is non-zero, then must emit an instruction which sets
2561	the value of the biv to the proper value. This is done after
2562	handling all of the givs, since some of them may need to use the
2563	biv's value in their initialization code. */
2564	if (biv_final_value)
2565	{
2566	/* If the loop has multiple exits, emit the insns before the
2567	loop to ensure that it will always be executed no matter
2568	how the loop exits. Otherwise emit the insn after the loop,
2569	since this is slightly more efficient. */
2570	if (! loop_number_exit_count[uid_loop_num[INSN_UID (loop_start)]])
2571	emit_insn_before (gen_move_insn (bl->biv->src_reg,
2572	biv_final_value),
2573	end_insert_before);
2574	else
2575	{
2576	/* Create a new register to hold the value of the biv, and then
2577	set the biv to its final value before the loop start. The biv
2578	is set to its final value before loop start to ensure that
2579	this insn will always be executed, no matter how the loop
2580	exits. */
2581	rtx tem = gen_reg_rtx (bl->biv->mode);
2582	emit_insn_before (gen_move_insn (tem, bl->biv->src_reg),
2583	loop_start);
2584	emit_insn_before (gen_move_insn (bl->biv->src_reg,
2585	biv_final_value),
2586	loop_start);
2587
2588	if (loop_dump_stream)
2589	fprintf (loop_dump_stream, "Biv %d mapped to %d for split.\n",
2590	REGNO (bl->biv->src_reg), REGNO (tem));
2591
2592	/* Set up the mapping from the original biv register to the new
2593	register. */
2594	bl->biv->src_reg = tem;
2595	}
2596	}
2597	}
2598	return result;
2599	}
2600
2601	/* Return 1 if the first and last unrolled copy of the address giv V is valid
2602	for the instruction that is using it. Do not make any changes to that
2603	instruction. */
2604
2605	static int
2606	verify_addresses (v, giv_inc, unroll_number)
2607	struct induction *v;
2608	rtx giv_inc;
2609	int unroll_number;
2610	{
2611	int ret = 1;
2612	rtx orig_addr = *v->location;
2613	rtx last_addr = plus_constant (v->dest_reg,
2614	INTVAL (giv_inc) * (unroll_number - 1));
2615
2616	/* First check to see if either address would fail. */
2617	if (! validate_change (v->insn, v->location, v->dest_reg, 0)
2618	\|\| ! validate_change (v->insn, v->location, last_addr, 0))
2619	ret = 0;
2620
2621	/* Now put things back the way they were before. This will always
2622	succeed. */
2623	validate_change (v->insn, v->location, orig_addr, 0);
2624
2625	return ret;
2626	}
2627
2628	/* For every giv based on the biv BL, check to determine whether it is
2629	splittable. This is a subroutine to find_splittable_regs ().
2630
2631	Return the number of instructions that set splittable registers. */
2632
2633	static int
2634	find_splittable_givs (bl, unroll_type, loop_start, loop_end, increment,
2635	unroll_number)
2636	struct iv_class *bl;
2637	enum unroll_types unroll_type;
2638	rtx loop_start, loop_end;
2639	rtx increment;
2640	int unroll_number;
2641	{
2642	struct induction v, v2;
2643	rtx final_value;
2644	rtx tem;
2645	int result = 0;
2646
2647	/* Scan the list of givs, and set the same_insn field when there are
2648	multiple identical givs in the same insn. */
2649	for (v = bl->giv; v; v = v->next_iv)
2650	for (v2 = v->next_iv; v2; v2 = v2->next_iv)
2651	if (v->insn == v2->insn && rtx_equal_p (v->new_reg, v2->new_reg)
2652	&& ! v2->same_insn)
2653	v2->same_insn = v;
2654
2655	for (v = bl->giv; v; v = v->next_iv)
2656	{
2657	rtx giv_inc, value;
2658
2659	/* Only split the giv if it has already been reduced, or if the loop is
2660	being completely unrolled. */
2661	if (unroll_type != UNROLL_COMPLETELY && v->ignore)
2662	continue;
2663
2664	/* The giv can be split if the insn that sets the giv is executed once
2665	and only once on every iteration of the loop. */
2666	/* An address giv can always be split. v->insn is just a use not a set,
2667	and hence it does not matter whether it is always executed. All that
2668	matters is that all the biv increments are always executed, and we
2669	won't reach here if they aren't. */
2670	if (v->giv_type != DEST_ADDR
2671	&& (! v->always_computable
2672	\|\| back_branch_in_range_p (v->insn, loop_start, loop_end)))
2673	continue;
2674
2675	/* The giv increment value must be a constant. */
2676	giv_inc = fold_rtx_mult_add (v->mult_val, increment, const0_rtx,
2677	v->mode);
2678	if (! giv_inc \|\| GET_CODE (giv_inc) != CONST_INT)
2679	continue;
2680
2681	/* The loop must be unrolled completely, or else have a known number of
2682	iterations and only one exit, or else the giv must be dead outside
2683	the loop, or else the final value of the giv must be known.
2684	Otherwise, it is not safe to split the giv since it may not have the
2685	proper value on loop exit. */
2686
2687	/* The used outside loop test will fail for DEST_ADDR givs. They are
2688	never used outside the loop anyways, so it is always safe to split a
2689	DEST_ADDR giv. */
2690
2691	final_value = 0;
2692	if (unroll_type != UNROLL_COMPLETELY
2693	&& (loop_number_exit_count[uid_loop_num[INSN_UID (loop_start)]]
2694	\|\| unroll_type == UNROLL_NAIVE)
2695	&& v->giv_type != DEST_ADDR
2696	/* The next part is true if the pseudo is used outside the loop.
2697	We assume that this is true for any pseudo created after loop
2698	starts, because we don't have a reg_n_info entry for them. */
2699	&& (REGNO (v->dest_reg) >= max_reg_before_loop
2700	\|\| (REGNO_FIRST_UID (REGNO (v->dest_reg)) != INSN_UID (v->insn)
2701	/* Check for the case where the pseudo is set by a shift/add
2702	sequence, in which case the first insn setting the pseudo
2703	is the first insn of the shift/add sequence. */
2704	&& (! (tem = find_reg_note (v->insn, REG_RETVAL, NULL_RTX))
2705	\|\| (REGNO_FIRST_UID (REGNO (v->dest_reg))
2706	!= INSN_UID (XEXP (tem, 0)))))
2707	/* Line above always fails if INSN was moved by loop opt. */
2708	\|\| (uid_luid[REGNO_LAST_UID (REGNO (v->dest_reg))]
2709	>= INSN_LUID (loop_end)))
2710	&& ! (final_value = v->final_value))
2711	continue;
2712
2713	#if 0
2714	/* Currently, non-reduced/final-value givs are never split. */
2715	/* Should emit insns after the loop if possible, as the biv final value
2716	code below does. */
2717
2718	/* If the final value is non-zero, and the giv has not been reduced,
2719	then must emit an instruction to set the final value. */
2720	if (final_value && !v->new_reg)
2721	{
2722	/* Create a new register to hold the value of the giv, and then set
2723	the giv to its final value before the loop start. The giv is set
2724	to its final value before loop start to ensure that this insn
2725	will always be executed, no matter how we exit. */
2726	tem = gen_reg_rtx (v->mode);
2727	emit_insn_before (gen_move_insn (tem, v->dest_reg), loop_start);
2728	emit_insn_before (gen_move_insn (v->dest_reg, final_value),
2729	loop_start);
2730
2731	if (loop_dump_stream)
2732	fprintf (loop_dump_stream, "Giv %d mapped to %d for split.\n",
2733	REGNO (v->dest_reg), REGNO (tem));
2734
2735	v->src_reg = tem;
2736	}
2737	#endif
2738
2739	/* This giv is splittable. If completely unrolling the loop, save the
2740	giv's initial value. Otherwise, save the constant zero for it. */
2741
2742	if (unroll_type == UNROLL_COMPLETELY)
2743	{
2744	/* It is not safe to use bl->initial_value here, because it may not
2745	be invariant. It is safe to use the initial value stored in
2746	the splittable_regs array if it is set. In rare cases, it won't
2747	be set, so then we do exactly the same thing as
2748	find_splittable_regs does to get a safe value. */
2749	rtx biv_initial_value;
2750
2751	if (splittable_regs[bl->regno])
2752	biv_initial_value = splittable_regs[bl->regno];
2753	else if (GET_CODE (bl->initial_value) != REG
2754	\|\| (REGNO (bl->initial_value) != bl->regno
2755	&& REGNO (bl->initial_value) >= FIRST_PSEUDO_REGISTER))
2756	biv_initial_value = bl->initial_value;
2757	else
2758	{
2759	rtx tem = gen_reg_rtx (bl->biv->mode);
2760
2761	emit_insn_before (gen_move_insn (tem, bl->biv->src_reg),
2762	loop_start);
2763	biv_initial_value = tem;
2764	}
2765	value = fold_rtx_mult_add (v->mult_val, biv_initial_value,
2766	v->add_val, v->mode);
2767	}
2768	else
2769	value = const0_rtx;
2770
2771	if (v->new_reg)
2772	{
2773	/* If a giv was combined with another giv, then we can only split
2774	this giv if the giv it was combined with was reduced. This
2775	is because the value of v->new_reg is meaningless in this
2776	case. */
2777	if (v->same && ! v->same->new_reg)
2778	{
2779	if (loop_dump_stream)
2780	fprintf (loop_dump_stream,
2781	"giv combined with unreduced giv not split.\n");
2782	continue;
2783	}
2784	/* If the giv is an address destination, it could be something other
2785	than a simple register, these have to be treated differently. */
2786	else if (v->giv_type == DEST_REG)
2787	{
2788	/* If value is not a constant, register, or register plus
2789	constant, then compute its value into a register before
2790	loop start. This prevents invalid rtx sharing, and should
2791	generate better code. We can use bl->initial_value here
2792	instead of splittable_regs[bl->regno] because this code
2793	is going before the loop start. */
2794	if (unroll_type == UNROLL_COMPLETELY
2795	&& GET_CODE (value) != CONST_INT
2796	&& GET_CODE (value) != REG
2797	&& (GET_CODE (value) != PLUS
2798	\|\| GET_CODE (XEXP (value, 0)) != REG
2799	\|\| GET_CODE (XEXP (value, 1)) != CONST_INT))
2800	{
2801	rtx tem = gen_reg_rtx (v->mode);
2802	emit_iv_add_mult (bl->initial_value, v->mult_val,
2803	v->add_val, tem, loop_start);
2804	value = tem;
2805	}
2806
2807	splittable_regs[REGNO (v->new_reg)] = value;
2808	}
2809	else
2810	{
2811	/* Splitting address givs is useful since it will often allow us
2812	to eliminate some increment insns for the base giv as
2813	unnecessary. */
2814
2815	/* If the addr giv is combined with a dest_reg giv, then all
2816	references to that dest reg will be remapped, which is NOT
2817	what we want for split addr regs. We always create a new
2818	register for the split addr giv, just to be safe. */
2819
2820	/* ??? If there are multiple address givs which have been
2821	combined with the same dest_reg giv, then we may only need
2822	one new register for them. Pulling out constants below will
2823	catch some of the common cases of this. Currently, I leave
2824	the work of simplifying multiple address givs to the
2825	following cse pass. */
2826
2827	/* As a special case, if we have multiple identical address givs
2828	within a single instruction, then we do use a single pseudo
2829	reg for both. This is necessary in case one is a match_dup
2830	of the other. */
2831
2832	v->const_adjust = 0;
2833
2834	if (v->same_insn)
2835	{
2836	v->dest_reg = v->same_insn->dest_reg;
2837	if (loop_dump_stream)
2838	fprintf (loop_dump_stream,
2839	"Sharing address givs in insn %d\n",
2840	INSN_UID (v->insn));
2841	}
2842	else if (unroll_type != UNROLL_COMPLETELY)
2843	{
2844	/* If not completely unrolling the loop, then create a new
2845	register to hold the split value of the DEST_ADDR giv.
2846	Emit insn to initialize its value before loop start. */
2847	tem = gen_reg_rtx (v->mode);
2848
2849	/* If the address giv has a constant in its new_reg value,
2850	then this constant can be pulled out and put in value,
2851	instead of being part of the initialization code. */
2852
2853	if (GET_CODE (v->new_reg) == PLUS
2854	&& GET_CODE (XEXP (v->new_reg, 1)) == CONST_INT)
2855	{
2856	v->dest_reg
2857	= plus_constant (tem, INTVAL (XEXP (v->new_reg,1)));
2858
2859	/* Only succeed if this will give valid addresses.
2860	Try to validate both the first and the last
2861	address resulting from loop unrolling, if
2862	one fails, then can't do const elim here. */
2863	if (verify_addresses (v, giv_inc, unroll_number))
2864	{
2865	/* Save the negative of the eliminated const, so
2866	that we can calculate the dest_reg's increment
2867	value later. */
2868	v->const_adjust = - INTVAL (XEXP (v->new_reg, 1));
2869
2870	v->new_reg = XEXP (v->new_reg, 0);
2871	if (loop_dump_stream)
2872	fprintf (loop_dump_stream,
2873	"Eliminating constant from giv %d\n",
2874	REGNO (tem));
2875	}
2876	else
2877	v->dest_reg = tem;
2878	}
2879	else
2880	v->dest_reg = tem;
2881
2882	/* If the address hasn't been checked for validity yet, do so
2883	now, and fail completely if either the first or the last
2884	unrolled copy of the address is not a valid address
2885	for the instruction that uses it. */
2886	if (v->dest_reg == tem
2887	&& ! verify_addresses (v, giv_inc, unroll_number))
2888	{
2889	if (loop_dump_stream)
2890	fprintf (loop_dump_stream,
2891	"Invalid address for giv at insn %d\n",
2892	INSN_UID (v->insn));
2893	continue;
2894	}
2895
2896	/* To initialize the new register, just move the value of
2897	new_reg into it. This is not guaranteed to give a valid
2898	instruction on machines with complex addressing modes.
2899	If we can't recognize it, then delete it and emit insns
2900	to calculate the value from scratch. */
2901	emit_insn_before (gen_rtx (SET, VOIDmode, tem,
2902	copy_rtx (v->new_reg)),
2903	loop_start);
2904	if (recog_memoized (PREV_INSN (loop_start)) < 0)
2905	{
2906	rtx sequence, ret;
2907
2908	/* We can't use bl->initial_value to compute the initial
2909	value, because the loop may have been preconditioned.
2910	We must calculate it from NEW_REG. Try using
2911	force_operand instead of emit_iv_add_mult. */
2912	delete_insn (PREV_INSN (loop_start));
2913
2914	start_sequence ();
2915	ret = force_operand (v->new_reg, tem);
2916	if (ret != tem)
2917	emit_move_insn (tem, ret);
2918	sequence = gen_sequence ();
2919	end_sequence ();
2920	emit_insn_before (sequence, loop_start);
2921
2922	if (loop_dump_stream)
2923	fprintf (loop_dump_stream,
2924	"Invalid init insn, rewritten.\n");
2925	}
2926	}
2927	else
2928	{
2929	v->dest_reg = value;
2930
2931	/* Check the resulting address for validity, and fail
2932	if the resulting address would be invalid. */
2933	if (! verify_addresses (v, giv_inc, unroll_number))
2934	{
2935	if (loop_dump_stream)
2936	fprintf (loop_dump_stream,
2937	"Invalid address for giv at insn %d\n",
2938	INSN_UID (v->insn));
2939	continue;
2940	}
2941	}
2942
2943	/* Store the value of dest_reg into the insn. This sharing
2944	will not be a problem as this insn will always be copied
2945	later. */
2946
2947	*v->location = v->dest_reg;
2948
2949	/* If this address giv is combined with a dest reg giv, then
2950	save the base giv's induction pointer so that we will be
2951	able to handle this address giv properly. The base giv
2952	itself does not have to be splittable. */
2953
2954	if (v->same && v->same->giv_type == DEST_REG)
2955	addr_combined_regs[REGNO (v->same->new_reg)] = v->same;
2956
2957	if (GET_CODE (v->new_reg) == REG)
2958	{
2959	/* This giv maybe hasn't been combined with any others.
2960	Make sure that it's giv is marked as splittable here. */
2961
2962	splittable_regs[REGNO (v->new_reg)] = value;
2963
2964	/* Make it appear to depend upon itself, so that the
2965	giv will be properly split in the main loop above. */
2966	if (! v->same)
2967	{
2968	v->same = v;
2969	addr_combined_regs[REGNO (v->new_reg)] = v;
2970	}
2971	}
2972
2973	if (loop_dump_stream)
2974	fprintf (loop_dump_stream, "DEST_ADDR giv being split.\n");
2975	}
2976	}
2977	else
2978	{
2979	#if 0
2980	/* Currently, unreduced giv's can't be split. This is not too much
2981	of a problem since unreduced giv's are not live across loop
2982	iterations anyways. When unrolling a loop completely though,
2983	it makes sense to reduce&split givs when possible, as this will
2984	result in simpler instructions, and will not require that a reg
2985	be live across loop iterations. */
2986
2987	splittable_regs[REGNO (v->dest_reg)] = value;
2988	fprintf (stderr, "Giv %d at insn %d not reduced\n",
2989	REGNO (v->dest_reg), INSN_UID (v->insn));
2990	#else
2991	continue;
2992	#endif
2993	}
2994
2995	/* Unreduced givs are only updated once by definition. Reduced givs
2996	are updated as many times as their biv is. Mark it so if this is
2997	a splittable register. Don't need to do anything for address givs
2998	where this may not be a register. */
2999
3000	if (GET_CODE (v->new_reg) == REG)
3001	{
3002	int count = 1;
3003	if (! v->ignore)
3004	count = reg_biv_class[REGNO (v->src_reg)]->biv_count;
3005
3006	splittable_regs_updates[REGNO (v->new_reg)] = count;
3007	}
3008
3009	result++;
3010
3011	if (loop_dump_stream)
3012	{
3013	int regnum;
3014
3015	if (GET_CODE (v->dest_reg) == CONST_INT)
3016	regnum = -1;
3017	else if (GET_CODE (v->dest_reg) != REG)
3018	regnum = REGNO (XEXP (v->dest_reg, 0));
3019	else
3020	regnum = REGNO (v->dest_reg);
3021	fprintf (loop_dump_stream, "Giv %d at insn %d safe to split.\n",
3022	regnum, INSN_UID (v->insn));
3023	}
3024	}
3025
3026	return result;
3027	}
3028
3029	/* Try to prove that the register is dead after the loop exits. Trace every
3030	loop exit looking for an insn that will always be executed, which sets
3031	the register to some value, and appears before the first use of the register
3032	is found. If successful, then return 1, otherwise return 0. */
3033
3034	/* ?? Could be made more intelligent in the handling of jumps, so that
3035	it can search past if statements and other similar structures. */
3036
3037	static int
3038	reg_dead_after_loop (reg, loop_start, loop_end)
3039	rtx reg, loop_start, loop_end;
3040	{
3041	rtx insn, label;
3042	enum rtx_code code;
3043	int jump_count = 0;
3044	int label_count = 0;
3045	int this_loop_num = uid_loop_num[INSN_UID (loop_start)];
3046
3047	/* In addition to checking all exits of this loop, we must also check
3048	all exits of inner nested loops that would exit this loop. We don't
3049	have any way to identify those, so we just give up if there are any
3050	such inner loop exits. */
3051
3052	for (label = loop_number_exit_labels[this_loop_num]; label;
3053	label = LABEL_NEXTREF (label))
3054	label_count++;
3055
3056	if (label_count != loop_number_exit_count[this_loop_num])
3057	return 0;
3058
3059	/* HACK: Must also search the loop fall through exit, create a label_ref
3060	here which points to the loop_end, and append the loop_number_exit_labels
3061	list to it. */
3062	label = gen_rtx (LABEL_REF, VOIDmode, loop_end);
3063	LABEL_NEXTREF (label) = loop_number_exit_labels[this_loop_num];
3064
3065	for ( ; label; label = LABEL_NEXTREF (label))
3066	{
3067	/* Succeed if find an insn which sets the biv or if reach end of
3068	function. Fail if find an insn that uses the biv, or if come to
3069	a conditional jump. */
3070
3071	insn = NEXT_INSN (XEXP (label, 0));
3072	while (insn)
3073	{
3074	code = GET_CODE (insn);
3075	if (GET_RTX_CLASS (code) == 'i')
3076	{
3077	rtx set;
3078
3079	if (reg_referenced_p (reg, PATTERN (insn)))
3080	return 0;
3081
3082	set = single_set (insn);
3083	if (set && rtx_equal_p (SET_DEST (set), reg))
3084	break;
3085	}
3086
3087	if (code == JUMP_INSN)
3088	{
3089	if (GET_CODE (PATTERN (insn)) == RETURN)
3090	break;
3091	else if (! simplejump_p (insn)
3092	/* Prevent infinite loop following infinite loops. */
3093	\|\| jump_count++ > 20)
3094	return 0;
3095	else
3096	insn = JUMP_LABEL (insn);
3097	}
3098
3099	insn = NEXT_INSN (insn);
3100	}
3101	}
3102
3103	/* Success, the register is dead on all loop exits. */
3104	return 1;
3105	}
3106
3107	/* Try to calculate the final value of the biv, the value it will have at
3108	the end of the loop. If we can do it, return that value. */
3109
3110	rtx
3111	final_biv_value (bl, loop_start, loop_end)
3112	struct iv_class *bl;
3113	rtx loop_start, loop_end;
3114	{
3115	rtx increment, tem;
3116
3117	/* ??? This only works for MODE_INT biv's. Reject all others for now. */
3118
3119	if (GET_MODE_CLASS (bl->biv->mode) != MODE_INT)
3120	return 0;
3121
3122	/* The final value for reversed bivs must be calculated differently than
3123	for ordinary bivs. In this case, there is already an insn after the
3124	loop which sets this biv's final value (if necessary), and there are
3125	no other loop exits, so we can return any value. */
3126	if (bl->reversed)
3127	{
3128	if (loop_dump_stream)
3129	fprintf (loop_dump_stream,
3130	"Final biv value for %d, reversed biv.\n", bl->regno);
3131
3132	return const0_rtx;
3133	}
3134
3135	/* Try to calculate the final value as initial value + (number of iterations
3136	* increment). For this to work, increment must be invariant, the only
3137	exit from the loop must be the fall through at the bottom (otherwise
3138	it may not have its final value when the loop exits), and the initial
3139	value of the biv must be invariant. */
3140
3141	if (loop_n_iterations != 0
3142	&& ! loop_number_exit_count[uid_loop_num[INSN_UID (loop_start)]]
3143	&& invariant_p (bl->initial_value))
3144	{
3145	increment = biv_total_increment (bl, loop_start, loop_end);
3146
3147	if (increment && invariant_p (increment))
3148	{
3149	/* Can calculate the loop exit value, emit insns after loop
3150	end to calculate this value into a temporary register in
3151	case it is needed later. */
3152
3153	tem = gen_reg_rtx (bl->biv->mode);
3154	/* Make sure loop_end is not the last insn. */
3155	if (NEXT_INSN (loop_end) == 0)
3156	emit_note_after (NOTE_INSN_DELETED, loop_end);
3157	emit_iv_add_mult (increment, GEN_INT (loop_n_iterations),
3158	bl->initial_value, tem, NEXT_INSN (loop_end));
3159
3160	if (loop_dump_stream)
3161	fprintf (loop_dump_stream,
3162	"Final biv value for %d, calculated.\n", bl->regno);
3163
3164	return tem;
3165	}
3166	}
3167
3168	/* Check to see if the biv is dead at all loop exits. */
3169	if (reg_dead_after_loop (bl->biv->src_reg, loop_start, loop_end))
3170	{
3171	if (loop_dump_stream)
3172	fprintf (loop_dump_stream,
3173	"Final biv value for %d, biv dead after loop exit.\n",
3174	bl->regno);
3175
3176	return const0_rtx;
3177	}
3178
3179	return 0;
3180	}
3181
3182	/* Try to calculate the final value of the giv, the value it will have at
3183	the end of the loop. If we can do it, return that value. */
3184
3185	rtx
3186	final_giv_value (v, loop_start, loop_end)
3187	struct induction *v;
3188	rtx loop_start, loop_end;
3189	{
3190	struct iv_class *bl;
3191	rtx insn;
3192	rtx increment, tem;
3193	rtx insert_before, seq;
3194
3195	bl = reg_biv_class[REGNO (v->src_reg)];
3196
3197	/* The final value for givs which depend on reversed bivs must be calculated
3198	differently than for ordinary givs. In this case, there is already an
3199	insn after the loop which sets this giv's final value (if necessary),
3200	and there are no other loop exits, so we can return any value. */
3201	if (bl->reversed)
3202	{
3203	if (loop_dump_stream)
3204	fprintf (loop_dump_stream,
3205	"Final giv value for %d, depends on reversed biv\n",
3206	REGNO (v->dest_reg));
3207	return const0_rtx;
3208	}
3209
3210	/* Try to calculate the final value as a function of the biv it depends
3211	upon. The only exit from the loop must be the fall through at the bottom
3212	(otherwise it may not have its final value when the loop exits). */
3213
3214	/* ??? Can calculate the final giv value by subtracting off the
3215	extra biv increments times the giv's mult_val. The loop must have
3216	only one exit for this to work, but the loop iterations does not need
3217	to be known. */
3218
3219	if (loop_n_iterations != 0
3220	&& ! loop_number_exit_count[uid_loop_num[INSN_UID (loop_start)]])
3221	{
3222	/* ?? It is tempting to use the biv's value here since these insns will
3223	be put after the loop, and hence the biv will have its final value
3224	then. However, this fails if the biv is subsequently eliminated.
3225	Perhaps determine whether biv's are eliminable before trying to
3226	determine whether giv's are replaceable so that we can use the
3227	biv value here if it is not eliminable. */
3228
3229	/* We are emitting code after the end of the loop, so we must make
3230	sure that bl->initial_value is still valid then. It will still
3231	be valid if it is invariant. */
3232
3233	increment = biv_total_increment (bl, loop_start, loop_end);
3234
3235	if (increment && invariant_p (increment)
3236	&& invariant_p (bl->initial_value))
3237	{
3238	/* Can calculate the loop exit value of its biv as
3239	(loop_n_iterations * increment) + initial_value */
3240
3241	/* The loop exit value of the giv is then
3242	(final_biv_value - extra increments) * mult_val + add_val.
3243	The extra increments are any increments to the biv which
3244	occur in the loop after the giv's value is calculated.
3245	We must search from the insn that sets the giv to the end
3246	of the loop to calculate this value. */
3247
3248	insert_before = NEXT_INSN (loop_end);
3249
3250	/* Put the final biv value in tem. */
3251	tem = gen_reg_rtx (bl->biv->mode);
3252	emit_iv_add_mult (increment, GEN_INT (loop_n_iterations),
3253	bl->initial_value, tem, insert_before);
3254
3255	/* Subtract off extra increments as we find them. */
3256	for (insn = NEXT_INSN (v->insn); insn != loop_end;
3257	insn = NEXT_INSN (insn))
3258	{
3259	struct induction *biv;
3260
3261	for (biv = bl->biv; biv; biv = biv->next_iv)
3262	if (biv->insn == insn)
3263	{
3264	start_sequence ();
3265	tem = expand_binop (GET_MODE (tem), sub_optab, tem,
3266	biv->add_val, NULL_RTX, 0,
3267	OPTAB_LIB_WIDEN);
3268	seq = gen_sequence ();
3269	end_sequence ();
3270	emit_insn_before (seq, insert_before);
3271	}
3272	}
3273
3274	/* Now calculate the giv's final value. */
3275	emit_iv_add_mult (tem, v->mult_val, v->add_val, tem,
3276	insert_before);
3277
3278	if (loop_dump_stream)
3279	fprintf (loop_dump_stream,
3280	"Final giv value for %d, calc from biv's value.\n",
3281	REGNO (v->dest_reg));
3282
3283	return tem;
3284	}
3285	}
3286
3287	/* Replaceable giv's should never reach here. */
3288	if (v->replaceable)
3289	abort ();
3290
3291	/* Check to see if the biv is dead at all loop exits. */
3292	if (reg_dead_after_loop (v->dest_reg, loop_start, loop_end))
3293	{
3294	if (loop_dump_stream)
3295	fprintf (loop_dump_stream,
3296	"Final giv value for %d, giv dead after loop exit.\n",
3297	REGNO (v->dest_reg));
3298
3299	return const0_rtx;
3300	}
3301
3302	return 0;
3303	}
3304
3305
3306	/* Calculate the number of loop iterations. Returns the exact number of loop
3307	iterations if it can be calculated, otherwise returns zero. */
3308
3309	unsigned HOST_WIDE_INT
3310	loop_iterations (loop_start, loop_end)
3311	rtx loop_start, loop_end;
3312	{
3313	rtx comparison, comparison_value;
3314	rtx iteration_var, initial_value, increment, final_value;
3315	enum rtx_code comparison_code;
3316	HOST_WIDE_INT i;
3317	int increment_dir;
3318	int unsigned_compare, compare_dir, final_larger;
3319	unsigned long tempu;
3320	rtx last_loop_insn;
3321
3322	/* First find the iteration variable. If the last insn is a conditional
3323	branch, and the insn before tests a register value, make that the
3324	iteration variable. */
3325
3326	loop_initial_value = 0;
3327	loop_increment = 0;
3328	loop_final_value = 0;
3329	loop_iteration_var = 0;
3330
3331	/* We used to use pren_nonnote_insn here, but that fails because it might
3332	accidentally get the branch for a contained loop if the branch for this
3333	loop was deleted. We can only trust branches immediately before the
3334	loop_end. */
3335	last_loop_insn = PREV_INSN (loop_end);
3336
3337	comparison = get_condition_for_loop (last_loop_insn);
3338	if (comparison == 0)
3339	{
3340	if (loop_dump_stream)
3341	fprintf (loop_dump_stream,
3342	"Loop unrolling: No final conditional branch found.\n");
3343	return 0;
3344	}
3345
3346	/* ??? Get_condition may switch position of induction variable and
3347	invariant register when it canonicalizes the comparison. */
3348
3349	comparison_code = GET_CODE (comparison);
3350	iteration_var = XEXP (comparison, 0);
3351	comparison_value = XEXP (comparison, 1);
3352
3353	if (GET_CODE (iteration_var) != REG)
3354	{
3355	if (loop_dump_stream)
3356	fprintf (loop_dump_stream,
3357	"Loop unrolling: Comparison not against register.\n");
3358	return 0;
3359	}
3360
3361	/* Loop iterations is always called before any new registers are created
3362	now, so this should never occur. */
3363
3364	if (REGNO (iteration_var) >= max_reg_before_loop)
3365	abort ();
3366
3367	iteration_info (iteration_var, &initial_value, &increment,
3368	loop_start, loop_end);
3369	if (initial_value == 0)
3370	/* iteration_info already printed a message. */
3371	return 0;
3372
3373	/* If the comparison value is an invariant register, then try to find
3374	its value from the insns before the start of the loop. */
3375
3376	if (GET_CODE (comparison_value) == REG && invariant_p (comparison_value))
3377	{
3378	rtx insn, set;
3379
3380	for (insn = PREV_INSN (loop_start); insn ; insn = PREV_INSN (insn))
3381	{
3382	if (GET_CODE (insn) == CODE_LABEL)
3383	break;
3384
3385	else if (GET_RTX_CLASS (GET_CODE (insn)) == 'i'
3386	&& reg_set_p (comparison_value, insn))
3387	{
3388	/* We found the last insn before the loop that sets the register.
3389	If it sets the entire register, and has a REG_EQUAL note,
3390	then use the value of the REG_EQUAL note. */
3391	if ((set = single_set (insn))
3392	&& (SET_DEST (set) == comparison_value))
3393	{
3394	rtx note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
3395
3396	/* Only use the REG_EQUAL note if it is a constant.
3397	Other things, divide in particular, will cause
3398	problems later if we use them. */
3399	if (note && GET_CODE (XEXP (note, 0)) != EXPR_LIST
3400	&& CONSTANT_P (XEXP (note, 0)))
3401	comparison_value = XEXP (note, 0);
3402	}
3403	break;
3404	}
3405	}
3406	}
3407
3408	final_value = approx_final_value (comparison_code, comparison_value,
3409	&unsigned_compare, &compare_dir);
3410
3411	/* Save the calculated values describing this loop's bounds, in case
3412	precondition_loop_p will need them later. These values can not be
3413	recalculated inside precondition_loop_p because strength reduction
3414	optimizations may obscure the loop's structure. */
3415
3416	loop_iteration_var = iteration_var;
3417	loop_initial_value = initial_value;
3418	loop_increment = increment;
3419	loop_final_value = final_value;
3420	loop_comparison_code = comparison_code;
3421
3422	if (increment == 0)
3423	{
3424	if (loop_dump_stream)
3425	fprintf (loop_dump_stream,
3426	"Loop unrolling: Increment value can't be calculated.\n");
3427	return 0;
3428	}
3429	else if (GET_CODE (increment) != CONST_INT)
3430	{
3431	if (loop_dump_stream)
3432	fprintf (loop_dump_stream,
3433	"Loop unrolling: Increment value not constant.\n");
3434	return 0;
3435	}
3436	else if (GET_CODE (initial_value) != CONST_INT)
3437	{
3438	if (loop_dump_stream)
3439	fprintf (loop_dump_stream,
3440	"Loop unrolling: Initial value not constant.\n");
3441	return 0;
3442	}
3443	else if (final_value == 0)
3444	{
3445	if (loop_dump_stream)
3446	fprintf (loop_dump_stream,
3447	"Loop unrolling: EQ comparison loop.\n");
3448	return 0;
3449	}
3450	else if (GET_CODE (final_value) != CONST_INT)
3451	{
3452	if (loop_dump_stream)
3453	fprintf (loop_dump_stream,
3454	"Loop unrolling: Final value not constant.\n");
3455	return 0;
3456	}
3457
3458	/* ?? Final value and initial value do not have to be constants.
3459	Only their difference has to be constant. When the iteration variable
3460	is an array address, the final value and initial value might both
3461	be addresses with the same base but different constant offsets.
3462	Final value must be invariant for this to work.
3463
3464	To do this, need some way to find the values of registers which are
3465	invariant. */
3466
3467	/* Final_larger is 1 if final larger, 0 if they are equal, otherwise -1. */
3468	if (unsigned_compare)
3469	final_larger
3470	= ((unsigned HOST_WIDE_INT) INTVAL (final_value)
3471	> (unsigned HOST_WIDE_INT) INTVAL (initial_value))
3472	- ((unsigned HOST_WIDE_INT) INTVAL (final_value)
3473	< (unsigned HOST_WIDE_INT) INTVAL (initial_value));
3474	else
3475	final_larger = (INTVAL (final_value) > INTVAL (initial_value))
3476	- (INTVAL (final_value) < INTVAL (initial_value));
3477
3478	if (INTVAL (increment) > 0)
3479	increment_dir = 1;
3480	else if (INTVAL (increment) == 0)
3481	increment_dir = 0;
3482	else
3483	increment_dir = -1;
3484
3485	/* There are 27 different cases: compare_dir = -1, 0, 1;
3486	final_larger = -1, 0, 1; increment_dir = -1, 0, 1.
3487	There are 4 normal cases, 4 reverse cases (where the iteration variable
3488	will overflow before the loop exits), 4 infinite loop cases, and 15
3489	immediate exit (0 or 1 iteration depending on loop type) cases.
3490	Only try to optimize the normal cases. */
3491
3492	/* (compare_dir/final_larger/increment_dir)
3493	Normal cases: (0/-1/-1), (0/1/1), (-1/-1/-1), (1/1/1)
3494	Reverse cases: (0/-1/1), (0/1/-1), (-1/-1/1), (1/1/-1)
3495	Infinite loops: (0/-1/0), (0/1/0), (-1/-1/0), (1/1/0)
3496	Immediate exit: (0/0/X), (-1/0/X), (-1/1/X), (1/0/X), (1/-1/X) */
3497
3498	/* ?? If the meaning of reverse loops (where the iteration variable
3499	will overflow before the loop exits) is undefined, then could
3500	eliminate all of these special checks, and just always assume
3501	the loops are normal/immediate/infinite. Note that this means
3502	the sign of increment_dir does not have to be known. Also,
3503	since it does not really hurt if immediate exit loops or infinite loops
3504	are optimized, then that case could be ignored also, and hence all
3505	loops can be optimized.
3506
3507	According to ANSI Spec, the reverse loop case result is undefined,
3508	because the action on overflow is undefined.
3509
3510	See also the special test for NE loops below. */
3511
3512	if (final_larger == increment_dir && final_larger != 0
3513	&& (final_larger == compare_dir \|\| compare_dir == 0))
3514	/* Normal case. */
3515	;
3516	else
3517	{
3518	if (loop_dump_stream)
3519	fprintf (loop_dump_stream,
3520	"Loop unrolling: Not normal loop.\n");
3521	return 0;
3522	}
3523
3524	/* Calculate the number of iterations, final_value is only an approximation,
3525	so correct for that. Note that tempu and loop_n_iterations are
3526	unsigned, because they can be as large as 2^n - 1. */
3527
3528	i = INTVAL (increment);
3529	if (i > 0)
3530	tempu = INTVAL (final_value) - INTVAL (initial_value);
3531	else if (i < 0)
3532	{
3533	tempu = INTVAL (initial_value) - INTVAL (final_value);
3534	i = -i;
3535	}
3536	else
3537	abort ();
3538
3539	/* For NE tests, make sure that the iteration variable won't miss the
3540	final value. If tempu mod i is not zero, then the iteration variable
3541	will overflow before the loop exits, and we can not calculate the
3542	number of iterations. */
3543	if (compare_dir == 0 && (tempu % i) != 0)
3544	return 0;
3545
3546	return tempu / i + ((tempu % i) != 0);
3547	}
3548
3549	/* Replace uses of split bivs with their split pseudo register. This is
3550	for original instructions which remain after loop unrolling without
3551	copying. */
3552
3553	static rtx
3554	remap_split_bivs (x)
3555	rtx x;
3556	{
3557	register enum rtx_code code;
3558	register int i;
3559	register char *fmt;
3560
3561	if (x == 0)
3562	return x;
3563
3564	code = GET_CODE (x);
3565	switch (code)
3566	{
3567	case SCRATCH:
3568	case PC:
3569	case CC0:
3570	case CONST_INT:
3571	case CONST_DOUBLE:
3572	case CONST:
3573	case SYMBOL_REF:
3574	case LABEL_REF:
3575	return x;
3576
3577	case REG:
3578	#if 0
3579	/* If non-reduced/final-value givs were split, then this would also
3580	have to remap those givs also. */
3581	#endif
3582	if (REGNO (x) < max_reg_before_loop
3583	&& reg_iv_type[REGNO (x)] == BASIC_INDUCT)
3584	return reg_biv_class[REGNO (x)]->biv->src_reg;
3585	break;
3586
3587	default:
3588	break;
3589	}
3590
3591	fmt = GET_RTX_FORMAT (code);
3592	for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3593	{
3594	if (fmt[i] == 'e')
3595	XEXP (x, i) = remap_split_bivs (XEXP (x, i));
3596	if (fmt[i] == 'E')
3597	{
3598	register int j;
3599	for (j = 0; j < XVECLEN (x, i); j++)
3600	XVECEXP (x, i, j) = remap_split_bivs (XVECEXP (x, i, j));
3601	}
3602	}
3603	return x;
3604	}
3605
3606	/* If FIRST_UID is a set of REGNO, and FIRST_UID dominates LAST_UID (e.g.
3607	FIST_UID is always executed if LAST_UID is), then return 1. Otherwise
3608	return 0. COPY_START is where we can start looking for the insns
3609	FIRST_UID and LAST_UID. COPY_END is where we stop looking for these
3610	insns.
3611
3612	If there is no JUMP_INSN between LOOP_START and FIRST_UID, then FIRST_UID
3613	must dominate LAST_UID.
3614
3615	If there is a CODE_LABEL between FIRST_UID and LAST_UID, then FIRST_UID
3616	may not dominate LAST_UID.
3617
3618	If there is no CODE_LABEL between FIRST_UID and LAST_UID, then FIRST_UID
3619	must dominate LAST_UID. */
3620
3621	int
3622	set_dominates_use (regno, first_uid, last_uid, copy_start, copy_end)
3623	int regno;
3624	int first_uid;
3625	int last_uid;
3626	rtx copy_start;
3627	rtx copy_end;
3628	{
3629	int passed_jump = 0;
3630	rtx p = NEXT_INSN (copy_start);
3631
3632	while (INSN_UID (p) != first_uid)
3633	{
3634	if (GET_CODE (p) == JUMP_INSN)
3635	passed_jump= 1;
3636	/* Could not find FIRST_UID. */
3637	if (p == copy_end)
3638	return 0;
3639	p = NEXT_INSN (p);
3640	}
3641
3642	/* Verify that FIRST_UID is an insn that entirely sets REGNO. */
3643	if (GET_RTX_CLASS (GET_CODE (p)) != 'i'
3644	\|\| ! dead_or_set_regno_p (p, regno))
3645	return 0;
3646
3647	/* FIRST_UID is always executed. */
3648	if (passed_jump == 0)
3649	return 1;
3650
3651	while (INSN_UID (p) != last_uid)
3652	{
3653	/* If we see a CODE_LABEL between FIRST_UID and LAST_UID, then we
3654	can not be sure that FIRST_UID dominates LAST_UID. */
3655	if (GET_CODE (p) == CODE_LABEL)
3656	return 0;
3657	/* Could not find LAST_UID, but we reached the end of the loop, so
3658	it must be safe. */
3659	else if (p == copy_end)
3660	return 1;
3661	p = NEXT_INSN (p);
3662	}
3663
3664	/* FIRST_UID is always executed if LAST_UID is executed. */
3665	return 1;
3666	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: